thi-ng/umbrella

View on GitHub
packages/sexpr/src/tokenize.ts

Summary

Maintainability
A
0 mins
Test Coverage
import { DEFAULT_SYNTAX, type SyntaxOpts, type Token } from "./api.js";

/**
 * Yields iterator of {@link Token}s (incl. location info) from `src` string
 * (or from a **characterwise** iterable). Scope and string delimiters
 * and whitespace characters can be configured via given `opts`. By
 * default {@link DEFAULT_SYNTAX} is used.
 *
 * - {@link SyntaxOpts}
 * - {@link Token}
 *
 * @param src -
 * @param opts -
 */
export function* tokenize(
    src: Iterable<string>,
    opts?: Partial<SyntaxOpts>
): IterableIterator<Token> {
    const {
        scopes: rawScopes,
        whiteSpace,
        string,
        comment,
    } = {
        ...DEFAULT_SYNTAX,
        ...opts,
    };
    const scopes = rawScopes
        .reduce((acc, x) => acc.concat(<any>x), [])
        .join("");
    let token = "";
    let isString = false;
    let isComment = false;
    let tokenLine = 0;
    let tokenCol = 0;
    let line = 0;
    let col = -1;
    const $ = (value: string): Token => ({
        value,
        line: tokenLine,
        col: tokenCol,
    });
    for (let c of src) {
        if (c === "\n") {
            line++;
            col = -1;
        } else {
            col++;
        }
        if (isComment) {
            if (c === "\n") isComment = false;
        } else if (!isString) {
            if (whiteSpace.test(c)) {
                token && (yield $(token));
                token = "";
            } else if (scopes.indexOf(c) !== -1) {
                token && (yield $(token));
                tokenLine = line;
                tokenCol = col;
                yield $(c);
                token = "";
                tokenCol++;
            } else if (c === string) {
                token && (yield $(token));
                tokenLine = line;
                tokenCol = col;
                token = '"';
                isString = true;
            } else if (c === comment) {
                isComment = true;
            } else {
                if (!token) {
                    tokenLine = line;
                    tokenCol = col;
                }
                token += c;
            }
        } else if (c === string && token[token.length - 1] !== "\\") {
            token += '"';
            yield $(token);
            token = "";
            isString = false;
        } else {
            token += c;
        }
    }
    token && (yield $(token));
}