Skip to content

Instantly share code, notes, and snippets.

@brecert
Created December 19, 2022 03:57
Show Gist options
  • Save brecert/768bcac33f9bb1ba68533d4d6a2377f5 to your computer and use it in GitHub Desktop.
Save brecert/768bcac33f9bb1ba68533d4d6a2377f5 to your computer and use it in GitHub Desktop.
// deno-lint-ignore-file no-unused-vars prefer-const no-cond-assign no-empty
import {
assert,
assertEquals,
assertExists,
assertInstanceOf,
equal,
} from "https://deno.land/std@0.168.0/testing/asserts.ts";
type LiteralT =
| ["Integer", bigint]
| ["String", string];
type TokenT =
| { type: "Name" | "Symbol" | "Open" | "Close"; value: string }
| { type: "Separator"; value: undefined }
| { type: "Literal"; value: Literal };
const isSeparator = (char: string) => /[\s;]/.test(char);
const isSymbol = (char: string) => /[~!@#$%^&*+=.\/`:<>?'-]/.test(char);
const isDigit = (char: string) => /[1-9]/.test(char);
const isAlpha = (char: string) => /[a-z]/.test(char);
// oop time!!!!
class Token {
constructor(
public readonly type: TokenT["type"],
public readonly value: TokenT["value"],
) {}
static Separator = () => new Token("Separator", undefined);
static Name = (name: string) => new Token("Name", name);
static Symbol = (symbol: string) => new Token("Symbol", symbol);
static Literal = (literal: Literal) => new Token("Literal", literal);
static Open = (delimiter: string) => new Token("Open", delimiter);
static Close = (delimiter: string) => new Token("Close", delimiter);
// Utility Literals
static Integer = (int: bigint) => Token.Literal(Literal.Integer(int));
static String = (string: string) => Token.Literal(Literal.String(string));
matches(other: Token) {
return other.type === this.type &&
(this.type === "Literal" || this.value === other.value);
}
toString() {
return `Token.${this.type}(${this.value})`;
}
get [Symbol.toStringTag]() {
return `Token.${this.type}`;
}
}
class Literal {
constructor(public type: LiteralT["0"], public value: LiteralT["1"]) {}
static Integer = (int: bigint) => new Literal("Integer", int);
static String = (string: string) => new Literal("String", string);
matches(other: Literal) {
return other.type === this.type && other.value === this.value;
}
toString() {
return `Literal.${this.type}(${this.value})`;
}
get [Symbol.toStringTag]() {
return `Literal.${this.type}`;
}
}
// class Separator extends Token {
// constructor() {
// super('Separator')
// }
// }
// class Name extends Token {
// constructor(public name: string) {
// super('Name')
// }
// }
// class Symbol extends Token {
// constructor(public symbol: string) {
// super('Symbol')
// }
// }
// class Open extends Token {
// constructor(delimiter: string) {
// super('Open')
// }
// }
// class Close extends Token {
// constructor(delimiter: string) {
// super('Close')
// }
// }
// class Literal extends Token {
// constructor(public literal: Literal) {
// super('Literal')
// }
// }
class SeekingReader<T> {
#pos = 0;
seekingPos = 0;
get pos() {
return this.#pos;
}
set pos(pos: number) {
this.#pos = pos;
this.seekingPos = pos;
}
constructor(public list: T[]) {}
next() {
return this.list[this.pos++];
}
*reading() {
let val: T;
while (val = this.next()) {
yield val;
}
}
seek(n = 0) {
let val = this.list[this.seekingPos];
this.seekingPos += n;
return val;
}
*seeking(offset = 0, by = 1) {
let val: T;
this.seekingPos += offset;
while (val = this.seek(by)) {
yield val;
}
}
peek(n = 1) {
return this.list[this.pos + n - 1];
}
}
class Lexer {
#pos = 0;
peekingPos = 0;
chars: string[];
get pos() {
return this.#pos;
}
set pos(pos: number) {
this.#pos = pos;
this.peekingPos = pos;
}
constructor(source: string) {
this.chars = [...source];
}
nextChar() {
return this.chars[this.pos++];
}
*nextChars() {
let char: string;
while (char = this.nextChar()) {
yield char;
}
}
seekChar(n = 0) {
let char = this.chars[this.peekingPos];
this.peekingPos += n;
return char;
}
*seekChars(offset = 0, by = 1) {
let char: string;
this.peekingPos += offset;
while (char = this.seekChar(by)) {
yield char;
}
}
skipSeparators() {
let len = 0;
for (let char of this.seekChars()) {
if (!isSeparator(char)) break;
len += 1;
}
this.pos += len;
}
*tokens(): Generator<Token> {
loop:
for (let char of this.seekChars(0, 0)) {
let len: number, token: Token;
switch (char) {
case " ": {
this.pos += 1;
continue;
}
case ";":
case "\n": {
this.skipSeparators();
yield Token.Separator();
continue loop;
}
case '"': {
let [len, token] = this.parseString();
yield token;
this.pos += len;
break;
}
default: {
if (isDigit(char)) {
let [len, token] = this.parseInteger();
yield token;
this.pos += len;
} else if (isAlpha(char)) {
let [len, token] = this.parseName();
yield token;
this.pos += len;
} else if (isSymbol(char)) {
let [len, token] = this.parseSymbol();
yield token;
this.pos += len;
} else {
throw new Error(`Invalid character '${char}' at ${this.pos}`);
}
}
}
}
}
parseSymbol(): [size: number, token: Token] {
let len = 0;
let symbol = "";
for (let char of this.seekChars()) {
if (!isSymbol(char)) break;
len += 1;
symbol += char;
}
return [len, Token.Symbol(symbol)];
}
parseName(): [size: number, token: Token] {
let len = 0;
let name = "";
for (let char of this.seekChars()) {
if (!isAlpha(char)) break;
len += 1;
name += char;
}
return [len, Token.Name(name)];
}
// todo: add radix
parseInteger(): [size: number, token: Token] {
let int = BigInt(0);
let len = 0;
for (let char of this.seekChars()) {
if (!isDigit(char)) break;
len += 1;
int = int * 10n + BigInt(char);
}
return [len, Token.Integer(int)];
}
parseString(): [size: number, token: Token] {
let start = this.peekingPos;
let char: string;
let result = "";
let len = 1;
for (let char of this.seekChars(1)) {
len += 1;
switch (char) {
case '"': {
return [len, Token.String(result)];
}
default: {
result += char;
}
}
}
// todo: real syntax error + span not js lol
throw new SyntaxError(`string was not closed at ${start}`);
}
}
const Precedence = [
"None", //
"AddSub", // +, -
"MulDiv", // *, /
];
class BindingPower {
constructor(public lbp?: number | null, public rbp?: number | null) {}
static r(rbp: number) {
return new this(undefined, rbp);
}
static l(lbp: number) {
return new this(lbp, undefined);
}
static i(lbp: number, rbp: number) {
return new this(lbp, rbp);
}
}
const BP = (lbp?: number | null, rbp?: number | null) =>
new BindingPower(lbp, rbp);
type BP = BindingPower;
// Prefix Pat <Expr>
// Infix: <Expr> Pat.. <Expr>
// Postfix <Expr> Pat
// ?
// these would be useful but I'll just pratt them
type TokenTrees = TokenTree[];
type TokenTree =
| ["Block", TokenTrees] // { ... }
| ["List", TokenTrees] // [ ... ]
| ["Name", string] // name Name
| ["Symbol", string] // < + -
| ["Integer", number] // 123 0
| ["String", string]; // "string with spaces"
type PatternElement =
| BindingPower
| Token;
type Pattern = PatternElement[];
type PrefixFn = (token: Token) => Output;
type InfixFn = (token: Token, lhs: Output) => Output;
type Output = unknown;
function* filterFrom<T>(
iter: IterableIterator<T>,
predicate: (value: T) => unknown,
) {
for (let value of iter) {
if (predicate(value)) {
yield value;
}
}
}
function findIn<T>(
iter: IterableIterator<T>,
predicate: (value: T) => boolean,
) {
return filterFrom(iter, predicate).next().value;
}
type TokenPredicate = (tok: Token) => boolean;
class Parser {
tokens: SeekingReader<Token>;
patterns = {
prefix: new Map<Token | TokenPredicate, PrefixFn>(),
infix: new Map<Token, InfixFn>(),
};
bindingPower = {
prefix: new Map<Token, BP>(),
infix: new Map<Token | TokenPredicate, BP>(),
};
// todo: Generator<Token>
constructor(tokens: Token[]) {
this.tokens = new SeekingReader(tokens);
}
getPrefix(token: Token) {
let value = findIn(
this.patterns.prefix.entries(),
([tok]) => typeof tok === "function" ? tok(token) : token.matches(tok),
)?.[1](token);
assertExists(value, `Invalid prefix: ${token}`);
return value;
}
getInfix(token: Token, lhs: Output) {
let value = findIn(
this.patterns.infix.entries(),
([tok]) => token.matches(tok),
)?.[1](token, lhs);
assertExists(value, `Invalid infix: ${token}`);
return value;
}
infixBindingPower(token: Token | undefined) {
return findIn(
this.bindingPower.infix.entries(),
([tok]) =>
typeof tok === "function"
? token && tok(token) || false
: token?.matches(tok) ?? false,
)?.[1];
}
prefixBindingPower(token: Token | undefined) {
return findIn(
this.bindingPower.prefix.entries(),
([tok]) => token?.matches(tok) ?? false,
)?.[1];
}
parse(mbp = 0) {
let token = this.tokens.next();
if (token == null) {
// end
return;
}
let lhs = this.getPrefix(token);
while (true) {
let token = this.tokens.peek();
// todo: check if valid infix/postfix start
// todo: error?
let ibp = this.infixBindingPower(token);
console.log("ibp", [this.tokens.pos], ibp, token);
if (ibp !== undefined) {
// infix requires lbp
if (ibp.lbp! < mbp) break;
this.tokens.next();
lhs = this.getInfix(token, lhs);
// console.log(this.tokens.pos, token, lhs);
continue;
} else {
break;
}
}
return lhs;
}
// maybe should be not be part of the class
parsePattern(pattern: Pattern) {
return pattern.map((pat) => {
if (pat instanceof BindingPower) {
return this.parse(pat.rbp ?? 0);
} else {
let token = this.tokens.next();
assertEquals(
token,
pat,
`unexpected ${token} at ${this.tokens.pos}. expected ${pat}`,
);
return token;
}
});
}
}
// syn '+ b
// syn '- b
// syn 'is 'not b
// syn 'is b
// syn 'not b
// syn a '= b
// syn a '? b ': c
// syn a '> b
// syn a '< b
// syn a '+ b
// syn a '- b
// syn a '* b
// syn a '/ b
let text = `
syn '+ a => "+{a}"
syn '- a => "-{a}"
syn a '+ b => "{a}+{b}"
syn a '- b => "{a}-{b}"
syn a '/ b => "{a}-{b}"
syn a '* b => "{a}-{b}"
syn a '** b => "{a}-{b}"
syn a '> b => "{a}>{b}"
syn a '< b => "{a}<{b}"
syn a '>= b => "{a}>={b}"
syn a '<= b => "{a}<={b}"
syn a '> b '> c => "{a} > {b} && {b} > {c}"
syn a '= b => "{a} = {b}"
syn 'if a 'then b 'else c => ""
syn a '? b ': c => if a then b else c
syn a 'is 'not b => "{a} = {b}"
1 > 2
`;
let lexer = new Lexer(text);
let parser = new Parser([...lexer.tokens()]);
let bp = 0;
parser.bindingPower.prefix.set(Token.Separator(), BP(null, 0));
parser.bindingPower.infix.set(Token.Separator(), BP(0, 0));
// parser.bindingPower.prefix.set(name => )
// parser.bindingPower.prefix.set(Token.Integer(0n), BP(null, 0));
parser.patterns.prefix.set(
Token.Integer(0n),
(tok) => (tok.value as Literal).value!,
);
parser.bindingPower.prefix.set(
Token.Name("syn"),
BP(null, bp++),
);
parser.patterns.prefix.set(
Token.Separator(),
(tok) => {
return parser.parse(0)!;
},
);
parser.patterns.infix.set(
Token.Separator(),
(tok, lhs) => {
return [lhs, parser.parse(1)];
},
);
parser.patterns.prefix.set(
Token.Name("syn"),
(tok) => {
let parts: Pattern = [];
let output: Output;
let substitutions: { at: number; symbol: string }[] = [];
let i = 0;
for (const tok of parser.tokens.reading()) {
if ((tok.matches(Token.Symbol("=>")))) {
// parser.tokens.pos -= 1;
// console.log("p", parser.parse(1));
output = parser.parse(1);
break;
}
if (tok.matches(Token.Symbol("'"))) {
const next = parser.tokens.next();
assert(
next.type === "Name" || next.type === "Symbol",
`Invalid token in pattern part at ${parser.tokens.pos}`,
);
parts.push(next);
} else if (
tok.type === "Symbol" && (tok.value as string).startsWith("'")
) {
parts.push(Token.Symbol((tok.value as string).slice(1)));
} else if (tok.type === "Name") {
parts.push(BP(++bp, ++bp));
substitutions.push({ at: i, symbol: tok.value as string });
} else {
throw new Error(`Invalid pattern part ${tok} at ${parser.tokens.pos}`);
}
i += 1;
}
const [start, ...pattern] = parts;
if (start instanceof Token) {
parser.bindingPower.prefix.set(
start,
BP(null, ++bp),
);
parser.patterns.prefix.set(start, (_tok) => {
return [start.value, ...parser.parsePattern(pattern)];
});
} else if (start instanceof BindingPower) {
console.log({ start, pattern });
assertInstanceOf(pattern[0], Token);
parser.bindingPower.infix.set(
pattern[0],
start,
);
parser.patterns.infix.set(pattern[0], (_tok, lhs) => {
let values = [0, lhs, ...parser.parsePattern(pattern.slice(1))];
if (typeof output === "string") {
console.log(values)
return substitutions.reduce(
(prev, { at, symbol }) =>
prev.replaceAll(`{${symbol}}`, values[at]!.toString()),
output,
);
} else {
return output;
}
for (let i = 0; i < parts.length; i++) {
// let part = parts[i]
// let value = values[i]
}
return [
(pattern[0] as Token).value,
lhs,
parser.parsePattern(pattern.slice(1)),
];
const parsed = parser.parsePattern(pattern.slice(1));
// return (output as string).replaceAll("{a}", `${lhs}`)
});
}
return ["syn", parts, output];
},
);
// after syn
parser.patterns.prefix.set(
(tok) => tok.type === "Name" && parser.prefixBindingPower(tok) == null,
(tok) => {
return tok.value;
},
);
// parser.patterns.prefix.set(
// Token.Integer(0n),
// (tok) => ["int", tok.value],
// );
// parser.bindingPower.infix.set(
// Token.Symbol("+"),
// new BindingPower(3, 4),
// );
// parser.bindingPower.infix.set(
// Token.Symbol("?"),
// new BindingPower(0, 1)
// )
// parser.patterns.infix.set(
// Token.Symbol("+"),
// (tok, lhs) => {
// const [rhs] = parser.parsePattern([BP(0, 1)]);
// return ["+", lhs, rhs];
// },
// );
// parser.patterns.infix.set(
// Token.Symbol("?"),
// (tok, lhs) => {
// const [then_, _, else_] = parser.parsePattern([
// BP(0, 1),
// Token.Symbol(":"),
// BP(2, 3),
// ]);
// return ["if", lhs, "then", then_, "else", else_];
// },
// );
console.log(Deno.inspect(parser.parse(), { colors: true, depth: Infinity }));
// console.log(parser.parse());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment