Skip to content

Instantly share code, notes, and snippets.

@nopeless
Created November 30, 2021 22:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nopeless/f3dc2e6888475b051e484f72dd6c846d to your computer and use it in GitHub Desktop.
Save nopeless/f3dc2e6888475b051e484f72dd6c846d to your computer and use it in GitHub Desktop.
i speed run arithmetic operation tokenization in 90 minutes
/* eslint-disable consistent-return */
class TokenType {
constructor(type, regex) {
this.type = type;
this.regex = regex;
}
match(s) {
const isMatch = this.regex.exec(s);
console.log(isMatch);
if (isMatch) {
return {
name: `Token`,
type: this,
content: isMatch[0],
length: isMatch[0].length,
};
}
return;
}
}
// Apparently there isn't a way to make it anchor start and end by default (didn't know that)
const INT = new TokenType(`int`, /^\d+/);
const MUL = new TokenType(`mul`, /^\*/);
const ADD = new TokenType(`add`, /^\+/);
const OPEN_PAREN = new TokenType(`(`, /^\(/);
const CLOSE_PAREN = new TokenType(`)`, /^\)/);
const TOKEN_LIST = [INT, MUL, ADD, OPEN_PAREN, CLOSE_PAREN];
/**
* Splits by whitespace
*/
function splitter(s) {
return s.split(/\s/g);
}
// I forgot what this was called, but ill just call it context for now
class Context {
constructor(name, count) {
this.name = name;
this.count = count;
}
}
class Grammar {
/**
* Executor should return self
*/
constructor(executor) {
this.executor = executor;
}
/**
* Some other implementations write this as "start" and "end"
*/
exec(tokens, entry) {
return this.executor(tokens, entry);
}
}
class IExpression {
value() {
throw new Error(`Not implemented`);
}
length() {
throw new Error(`Not implemented`);
}
}
class IntegerExpression extends IExpression {
constructor(val) {
super();
this.val = val;
}
value() {
return this.val;
}
length() {
return 1;
}
}
class AddExpression extends IExpression {
constructor(left, right) {
super();
this.left = left;
this.right = right;
}
value() {
return this.left.value() + this.right.value();
}
length() {
return 1 + this.left.length() + this.right.length();
}
}
class MultiplyExpression extends IExpression {
constructor(left, right) {
super();
this.left = left;
this.right = right;
}
value() {
return this.left.value() * this.right.value();
}
length() {
return 1 + this.left.length() + this.right.length();
}
}
class ParenthesizedExpression extends IExpression {
constructor(expression) {
super();
this.expression = expression;
}
value() {
return this.expression.value();
}
length() {
return 2 + this.expression.length();
}
}
class TermExpression extends IExpression {
constructor(expression) {
super();
this.expression = expression;
}
value() {
return this.expression.value();
}
length() {
return this.expression.length();
}
}
class ExpressionExpression extends IExpression {
constructor(expression) {
super();
this.expression = expression;
}
value() {
return this.expression.value();
}
length() {
return this.expression.length();
}
}
// Keep it simple just keep track of a global
// entry point is Exp
const Exp = new Grammar(function(tokens, entry) {
const res = (() => {
const term = Grammar.Term.exec(tokens);
if (!term) throw new Error(`cannot parse`);
tokens = tokens.slice(term.length());
if (term instanceof TermExpression) {
if (tokens.length) {
// attempt to parse +
if (tokens[0].type.type === `add`) {
tokens = tokens.slice(1);
const right = Grammar.Term.exec(tokens);
if (!right) throw new Error(`cannot parse`);
tokens = tokens.slice(right.length());
return new AddExpression(term, right);
}
// attempt to parse *
if (tokens[0].type.type === `mul`) {
tokens = tokens.slice(1);
const right = Grammar.Term.exec(tokens);
if (!right) throw new Error(`cannot parse`);
tokens = tokens.slice(term.length());
return new MultiplyExpression(term, right);
}
throw new Error(`cannot parse anything other than add`);
}
}
return new ExpressionExpression(term);
})();
if (entry && tokens.length) {
throw new Error(`EXPECTED EOF, got ${tokens[0].content}`);
}
return res;
});
const Term = new Grammar(function(tokens, entry) {
const res = (() => {
if (tokens[0] && tokens[0].type.type === `(`) {
tokens = tokens.slice(1);
const exp = Grammar.Exp.exec(tokens);
tokens = tokens.slice(exp.length());
if (tokens[0].type.type !== `)`) {
throw new Error(`expected closing paren`);
}
tokens = tokens.slice(1);
return new ParenthesizedExpression(exp);
}
if (tokens[0] && tokens[0].type.type === `int`) {
const num = Number(tokens[0].content);
tokens = tokens.slice(1);
if (tokens.length && tokens[0].type.type === `mul`) {
tokens = tokens.slice(1);
const right = Grammar.Term.exec(tokens);
if (!right) {
throw new Error(`cannot parse right side`);
}
return new TermExpression(new MultiplyExpression(new IntegerExpression(num), right));
}
return new TermExpression(new IntegerExpression(num));
}
})();
if (entry && tokens.length) {
throw new Error(`EXPECTED EOF, got ${tokens[0].content}`);
}
return res;
});
Grammar.Exp = Exp;
Grammar.Term = Term;
function tokenize(s) {
s = splitter(s);
const res = [];
for (const t of s) {
let pointer = 0;
for (;;) {
let hasMatch = false;
const target = t.substring(pointer);
if (!target) break;
for (const token of TOKEN_LIST) {
const match = token.match(target);
if (match) {
pointer += match.length;
res.push(match);
hasMatch = true;
break;
}
}
if (!hasMatch) {
throw new Error(`Cannot parse '${target}'`);
}
}
if (pointer < t.length) {
throw new Error(`Expected EOF, got '${t.substring(pointer)}'`);
}
}
return res;
}
const tokenized = tokenize(`1 + 2`);
console.log(`tokenized`, tokenized);
const res = Grammar.Exp.exec(tokenized, true);
console.log(res);
console.log(res.value());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment