Files
blockflow-workbench/src/core/expression/tokenizer.ts
gamewhale 589ff15213 chore: 初始化 BlockFlow Workbench 仓库
建立前端与 Tauri 桌面端的首个版本提交,包含核心编辑器、项目文件读写、测试与构建配置。

补充 Git 忽略规则和换行规范,排除依赖、构建产物、本地运行日志与临时验证文件,方便在其他电脑继续开发。
2026-05-29 17:23:43 +08:00

141 lines
3.4 KiB
TypeScript

import type { Token } from "./types"
export class ExpressionSyntaxError extends Error {
constructor(
message: string,
readonly position: number
) {
super(`${message} at ${position}`)
this.name = "ExpressionSyntaxError"
}
}
const twoCharOperators = new Set(["==", "!=", ">=", "<=", "&&", "||", "??"])
const oneCharOperators = new Set([">", "<", "!"])
const punctuation = new Set([".", "(", ")", "[", "]"])
export function tokenizeExpression(input: string): Token[] {
const tokens: Token[] = []
let position = 0
while (position < input.length) {
const char = input[position]
if (char === undefined) {
break
}
if (/\s/.test(char)) {
position += 1
continue
}
if (char === "\"") {
const token = readString(input, position)
tokens.push(token)
position = token.position + token.value.length + 2
continue
}
if (/[0-9]/.test(char)) {
const start = position
position += 1
while (position < input.length && /[0-9]/.test(input[position] ?? "")) {
position += 1
}
if (input[position] === ".") {
position += 1
while (position < input.length && /[0-9]/.test(input[position] ?? "")) {
position += 1
}
}
tokens.push({ type: "number", value: input.slice(start, position), position: start })
continue
}
if (isIdentifierStart(char)) {
const start = position
position += 1
while (position < input.length && isIdentifierPart(input[position] ?? "")) {
position += 1
}
const value = input.slice(start, position)
tokens.push({
type: value === "contains" ? "operator" : "identifier",
value,
position: start
})
continue
}
const twoChars = input.slice(position, position + 2)
if (twoCharOperators.has(twoChars)) {
tokens.push({ type: "operator", value: twoChars, position })
position += 2
continue
}
if (oneCharOperators.has(char)) {
tokens.push({ type: "operator", value: char, position })
position += 1
continue
}
if (punctuation.has(char)) {
tokens.push({ type: "punctuation", value: char, position })
position += 1
continue
}
throw new ExpressionSyntaxError(`Unexpected character "${char}"`, position)
}
tokens.push({ type: "eof", value: "", position: input.length })
return tokens
}
function readString(input: string, start: number): Token {
let value = ""
let position = start + 1
while (position < input.length) {
const char = input[position]
if (char === "\"") {
return { type: "string", value, position: start }
}
if (char === "\\") {
const escaped = input[position + 1]
if (escaped === undefined) {
throw new ExpressionSyntaxError("Unterminated escape sequence", position)
}
value += decodeEscape(escaped)
position += 2
continue
}
value += char
position += 1
}
throw new ExpressionSyntaxError("Unterminated string literal", start)
}
function decodeEscape(char: string): string {
if (char === "n") return "\n"
if (char === "r") return "\r"
if (char === "t") return "\t"
if (char === "\"") return "\""
if (char === "\\") return "\\"
return char
}
function isIdentifierStart(char: string): boolean {
return /[A-Za-z_$]/.test(char)
}
function isIdentifierPart(char: string): boolean {
return /[A-Za-z0-9_$-]/.test(char)
}