packages/postcss-ekscss/src/tokenize.ts
// Based on https://github.com/postcss/postcss-scss/blob/e57f9bdfdfaf49ae72f379f968d43c441fd77d18/lib/scss-tokenize.js
// TODO: Tokenize xcss tagged template literals within XCSS expressions
/* eslint-disable unicorn/prefer-code-point */
import type { Input } from 'postcss';
const SINGLE_QUOTE = 39; // '
const DOUBLE_QUOTE = 34; // "
const BACKSLASH = 92; // \
const SLASH = 47; // /
const NEWLINE = 10; // \n
const SPACE = 32; // " " (space)
const FEED = 12; // \f
const TAB = 9; // \t
const CR = 13; // \r
const OPEN_SQUARE = 91; // [
const CLOSE_SQUARE = 93; // ]
const OPEN_PARENTHESES = 40; // (
const CLOSE_PARENTHESES = 41; // )
const OPEN_CURLY = 123; // {
const CLOSE_CURLY = 125; // }
const SEMICOLON = 59; // ;
const ASTERISK = 42; // *
const COLON = 58; // :
const AT = 64; // @
const COMMA = 44; // ,
const DOLLAR_SIGN = 36; // $
const RE_AT_END = /[\t\n\f\r "#'()/;[\\\]{}]/g;
const RE_WORD_END = /[\t\n\f\r !"#'(),:;@[\\\]{}]|\/(?=\*)/g;
const RE_BAD_BRACKET = /.[\n\r"'(/\\]/;
const RE_HEX_ESCAPE = /[\da-f]/i;
const RE_NEW_LINE = /[\n\f\r]/g;
interface TokenizeOptions {
ignoreErrors?: boolean;
}
export type Token = readonly [
type: string,
content: string,
pos?: number,
next?: number,
scope?: string,
];
export interface Tokenizer {
back: (token: Token) => void;
nextToken: (opts?: { ignoreUnclosed?: boolean }) => Token | void;
endOfFile: () => boolean;
position: () => number;
}
export function tokenize(
input: Input & { error: (message: string, pos?: number) => void },
options: TokenizeOptions = {},
): Tokenizer {
const css = input.css.valueOf();
const ignore = options.ignoreErrors;
const len = css.length;
const buffer: Token[] = [];
const returned: Token[] = [];
let pos = 0;
let code: number;
let next: number;
let quote: typeof SINGLE_QUOTE | typeof DOUBLE_QUOTE;
let content: string;
// biome-ignore lint/suspicious/noShadowRestrictedNames: TODO:!
let escape: boolean;
let escaped: boolean;
let prev: string;
let n: number;
let currentToken: Token;
let brackets: 0 | 1;
function position() {
return pos;
}
function unclosed(what: string): never {
throw input.error(`Unclosed ${what}`, pos);
}
function endOfFile() {
return returned.length === 0 && pos >= len;
}
function interpolation() {
let depth = 1;
let stringQuote: boolean | number = false;
let stringEscaped = false;
while (depth > 0) {
next += 1;
if (css.length <= next) unclosed('interpolation');
code = css.charCodeAt(next);
n = css.charCodeAt(next + 1);
if (stringQuote) {
if (!stringEscaped && code === stringQuote) {
stringQuote = false;
stringEscaped = false;
} else if (code === BACKSLASH) {
stringEscaped = !stringEscaped;
} else if (stringEscaped) {
stringEscaped = false;
}
} else if (code === SINGLE_QUOTE || code === DOUBLE_QUOTE) {
stringQuote = code;
} else if (code === CLOSE_CURLY) {
depth -= 1;
} else if (n === OPEN_CURLY) {
depth += 1;
}
}
}
function nextToken(opts?: { ignoreUnclosed?: boolean }): Token | void {
if (returned.length > 0) return returned.pop();
if (pos >= len) return undefined;
const ignoreUnclosed = opts ? opts.ignoreUnclosed : false;
code = css.charCodeAt(pos);
switch (code) {
case NEWLINE:
case SPACE:
case TAB:
case CR:
case FEED:
next = pos;
do {
next += 1;
code = css.charCodeAt(next);
} while (
code === SPACE ||
code === NEWLINE ||
code === TAB ||
code === CR ||
code === FEED
);
currentToken = ['space', css.slice(pos, next)];
pos = next - 1;
break;
case OPEN_SQUARE:
case CLOSE_SQUARE:
case OPEN_CURLY:
case CLOSE_CURLY:
case COLON:
case SEMICOLON:
case CLOSE_PARENTHESES: {
const controlChar = String.fromCharCode(code);
currentToken = [controlChar, controlChar, pos];
break;
}
case COMMA:
currentToken = ['word', ',', pos, pos + 1];
break;
case OPEN_PARENTHESES:
prev = buffer.length > 0 ? buffer.pop()![1] : '';
n = css.charCodeAt(pos + 1);
if (prev === 'url' && n !== SINGLE_QUOTE && n !== DOUBLE_QUOTE) {
brackets = 1;
escaped = false;
next = pos + 1;
while (next <= css.length - 1) {
n = css.charCodeAt(next);
// eslint-disable-next-line unicorn/prefer-switch
if (n === BACKSLASH) {
escaped = !escaped;
} else if (n === OPEN_PARENTHESES) {
brackets += 1;
} else if (n === CLOSE_PARENTHESES) {
brackets -= 1;
if (brackets === 0) break;
}
next += 1;
}
content = css.slice(pos, next + 1);
currentToken = ['brackets', content, pos, next];
pos = next;
} else {
next = css.indexOf(')', pos + 1);
content = css.slice(pos, next + 1);
if (next === -1 || RE_BAD_BRACKET.test(content)) {
currentToken = ['(', '(', pos];
} else {
currentToken = ['brackets', content, pos, next];
pos = next;
}
}
break;
case SINGLE_QUOTE:
case DOUBLE_QUOTE:
quote = code;
next = pos;
escaped = false;
while (next < len) {
next++;
if (next === len) unclosed('string');
code = css.charCodeAt(next);
n = css.charCodeAt(next + 1);
if (!escaped && code === quote) {
break;
}
if (code === BACKSLASH) {
escaped = !escaped;
} else if (escaped) {
escaped = false;
} else if (code === DOLLAR_SIGN && n === OPEN_CURLY) {
interpolation();
}
}
currentToken = ['string', css.slice(pos, next + 1), pos, next];
pos = next;
break;
case AT:
RE_AT_END.lastIndex = pos + 1;
RE_AT_END.test(css);
next =
RE_AT_END.lastIndex === 0 ? css.length - 1 : RE_AT_END.lastIndex - 2;
currentToken = ['at-word', css.slice(pos, next + 1), pos, next];
pos = next;
break;
case BACKSLASH:
next = pos;
escape = true;
while (css.charCodeAt(next + 1) === BACKSLASH) {
next += 1;
escape = !escape;
}
code = css.charCodeAt(next + 1);
if (
escape &&
code !== SLASH &&
code !== SPACE &&
code !== NEWLINE &&
code !== TAB &&
code !== CR &&
code !== FEED
) {
next += 1;
if (RE_HEX_ESCAPE.test(css.charAt(next))) {
while (RE_HEX_ESCAPE.test(css.charAt(next + 1))) {
next += 1;
}
if (css.charCodeAt(next + 1) === SPACE) {
next += 1;
}
}
}
currentToken = ['word', css.slice(pos, next + 1), pos, next];
pos = next;
break;
default:
n = css.charCodeAt(pos + 1);
if (code === DOLLAR_SIGN && n === OPEN_CURLY) {
next = pos;
interpolation();
content = css.slice(pos, next + 1);
currentToken = ['root', content, pos, next];
pos = next;
} else if (code === SLASH && n === ASTERISK) {
next = css.indexOf('*/', pos + 2) + 1;
if (next === 0) {
if (ignore ?? ignoreUnclosed) {
next = css.length;
} else {
unclosed('comment');
}
}
currentToken = ['comment', css.slice(pos, next + 1), pos, next];
pos = next;
} else if (code === SLASH && n === SLASH) {
RE_NEW_LINE.lastIndex = pos + 1;
RE_NEW_LINE.test(css);
next =
RE_NEW_LINE.lastIndex === 0
? css.length - 1
: RE_NEW_LINE.lastIndex - 2;
content = css.slice(pos, next + 1);
currentToken = ['comment', content, pos, next, 'inline'];
pos = next;
} else {
RE_WORD_END.lastIndex = pos + 1;
RE_WORD_END.test(css);
next =
RE_WORD_END.lastIndex === 0
? css.length - 1
: RE_WORD_END.lastIndex - 2;
currentToken = ['word', css.slice(pos, next + 1), pos, next];
buffer.push(currentToken);
pos = next;
}
break;
}
pos++;
return currentToken;
}
function back(token: Token) {
returned.push(token);
}
return {
back,
endOfFile,
nextToken,
position,
};
}