import type { Token } from "./types" export class ExpressionSyntaxError extends Error { constructor( message: string, readonly position: number ) { super(`${message} at ${position}`) this.name = "ExpressionSyntaxError" } } const twoCharOperators = new Set(["==", "!=", ">=", "<=", "&&", "||", "??"]) const oneCharOperators = new Set([">", "<", "!"]) const punctuation = new Set([".", "(", ")", "[", "]"]) export function tokenizeExpression(input: string): Token[] { const tokens: Token[] = [] let position = 0 while (position < input.length) { const char = input[position] if (char === undefined) { break } if (/\s/.test(char)) { position += 1 continue } if (char === "\"") { const token = readString(input, position) tokens.push(token) position = token.position + token.value.length + 2 continue } if (/[0-9]/.test(char)) { const start = position position += 1 while (position < input.length && /[0-9]/.test(input[position] ?? "")) { position += 1 } if (input[position] === ".") { position += 1 while (position < input.length && /[0-9]/.test(input[position] ?? "")) { position += 1 } } tokens.push({ type: "number", value: input.slice(start, position), position: start }) continue } if (isIdentifierStart(char)) { const start = position position += 1 while (position < input.length && isIdentifierPart(input[position] ?? "")) { position += 1 } const value = input.slice(start, position) tokens.push({ type: value === "contains" ? "operator" : "identifier", value, position: start }) continue } const twoChars = input.slice(position, position + 2) if (twoCharOperators.has(twoChars)) { tokens.push({ type: "operator", value: twoChars, position }) position += 2 continue } if (oneCharOperators.has(char)) { tokens.push({ type: "operator", value: char, position }) position += 1 continue } if (punctuation.has(char)) { tokens.push({ type: "punctuation", value: char, position }) position += 1 continue } throw new ExpressionSyntaxError(`Unexpected character "${char}"`, position) } tokens.push({ type: "eof", value: "", position: input.length }) return tokens } function readString(input: string, start: number): Token { let value = "" let position = start + 1 while (position < input.length) { const char = input[position] if (char === "\"") { return { type: "string", value, position: start } } if (char === "\\") { const escaped = input[position + 1] if (escaped === undefined) { throw new ExpressionSyntaxError("Unterminated escape sequence", position) } value += decodeEscape(escaped) position += 2 continue } value += char position += 1 } throw new ExpressionSyntaxError("Unterminated string literal", start) } function decodeEscape(char: string): string { if (char === "n") return "\n" if (char === "r") return "\r" if (char === "t") return "\t" if (char === "\"") return "\"" if (char === "\\") return "\\" return char } function isIdentifierStart(char: string): boolean { return /[A-Za-z_$]/.test(char) } function isIdentifierPart(char: string): boolean { return /[A-Za-z0-9_$-]/.test(char) }