Skip to content

Instantly share code, notes, and snippets.

@kartiknair
Created April 30, 2023 07:38
Show Gist options
  • Save kartiknair/31f3d61b5ac5ceb56f951e0ea278363c to your computer and use it in GitHub Desktop.
Save kartiknair/31f3d61b5ac5ceb56f951e0ea278363c to your computer and use it in GitHub Desktop.
Little interpreter
function isDigit(str) {
return /^\d+$/.test(str);
}
function isAlpha(str) {
return /[a-z]/i.test(str);
}
function isWhitespace(str) {
return /\s/.test(str);
}
// old: code -> syntax tree
// new: code (string) -> tokens -> syntax tree
/*
let foo = 23;
-- lexing (lexical) or tokenizing
// has no concept of invalid syntax
// only exists to make white-space irrelevant
tokens: [
{type: 'keyword', keyword: 'let'}
{type: 'identifier', name: 'foo'}
{type: 'symbol', symbol: '='}
{type: 'numberLiteral', value: 23}
{type: 'symbol', symbol: ';'}
]
-- parsing
tree: {
type: 'stmt',
stmtType: 'let',
name: {type: 'identifier', name: 'foo'}, // a token
initializer: {
type: 'expr',
exprType: 'literal',
token: {type: 'numberLiteral', value: 23},
}
}
*/
function tokenize(code) {
let cursor = 0;
let keywords = ["let", "return", "true", "false"];
let symbols = [";", ",", "=", "(", ")", "{", "}"];
function token() {
// [ ] identifiers (names): e.g. foo, bar
// [ ] keywords: e.g. let, if, else, while
// [*] symbols: e.g. ;, ', ", ., =, (, )
// [*] literals: e.g. 24, 52, "hello", 'a'
if (isWhitespace(code[cursor])) {
cursor++;
return null;
}
if (symbols.includes(code[cursor])) {
cursor++;
return {
type: "token",
tokenType: "symbol",
symbol: code[cursor - 1],
};
} else if (code[cursor] === "-") {
cursor++;
if (code[cursor] === ">") {
cursor++;
return {
type: "token",
tokenType: "symbol",
symbol: "->",
};
} else {
throw `Lexer error: unexpected symbol '-'`;
}
}
if (isDigit(code[cursor])) {
// number
let numberBeginIdx = cursor;
while (isDigit(code[cursor])) {
cursor++;
}
let numberEndIdx = cursor;
let numberString = code.substring(numberBeginIdx, numberEndIdx);
let numberValue = parseInt(numberString);
return {
type: "token",
tokenType: "number",
value: numberValue,
};
}
if (code[cursor] === `'` || code[cursor] === `"`) {
// string
let quote = code[cursor];
cursor++;
let stringBeginIdx = cursor;
while (code[cursor] !== quote) {
cursor++;
}
let stringEndIdx = cursor;
let stringValue = code.substring(stringBeginIdx, stringEndIdx);
cursor++; // skip the closing quote
return {
typ: "token",
tokenType: "string",
value: stringValue,
};
}
if (isAlpha(code[cursor])) {
let identBeginIdx = cursor;
while (
cursor < code.length &&
(isAlpha(code[cursor]) || isDigit(code[cursor]))
) {
cursor++;
}
let identEndIdx = cursor;
let name = code.substring(identBeginIdx, identEndIdx); // "let", "bar"
if (keywords.includes(name)) {
return {
type: "token",
tokenType: "keyword",
keyword: name,
};
} else {
return {
type: "token",
tokenType: "identifier",
name: name,
};
}
}
throw `Lexer error: unknown character '${code[cursor]}'`;
}
let tokens = [];
while (cursor < code.length) {
let t = token();
if (t !== null) {
tokens.push(t);
}
}
return tokens;
}
function parse(tokens) {
let cursor = 0;
function parseExpr() {
let expr;
if (
tokens[cursor].tokenType === "number" ||
tokens[cursor].tokenType === "string"
) {
cursor++;
expr = {
type: "expr",
exprType: "literal",
token: tokens[cursor - 1],
};
} else if (
tokens[cursor].tokenType === "keyword" &&
(tokens[cursor].keyword === "true" || tokens[cursor].keyword === "false")
) {
cursor++;
expr = {
type: "expr",
exprType: "literal",
token: {
...tokens[cursor - 1],
// add in the JavaScript value
value: tokens[cursor - 1].keyword === "true",
},
};
} else if (tokens[cursor].tokenType === "identifier") {
cursor++;
expr = {
type: "expr",
exprType: "identifier",
name: tokens[cursor - 1],
};
} else if (isSymbol(tokens[cursor], "(")) {
cursor++;
let params = [];
if (!isSymbol(tokens[cursor], ")")) {
while (true) {
let param = expect(
"identifier",
"Expected parameter name in function literal."
);
params.push(param);
if (isSymbol(tokens[cursor], ",")) {
cursor++;
} else {
break; // if we get anything except for a comma we stop looping
}
}
}
expectSymbol(
")",
"Expected closing `)` after parameters in function literal."
);
expectSymbol(
"->",
"Expected '->' after parameter list in function literal."
);
expectSymbol(
"{",
"Expected opening '{' after arrow in function literal."
);
let body = [];
while (cursor < tokens.length && !isSymbol(tokens[cursor], "}")) {
body.push(parseStmt());
if (!isSymbol(tokens[cursor], ";")) {
parseErrorAtCursor("Expected ';'.");
}
cursor++; // skip the ;
}
expectSymbol("}", "Expected closing '{' after body in function literal.");
expr = {
type: "expr",
exprType: "funLit",
params: params,
body: body,
};
} else {
parseErrorAtCursor("Expected expression.");
}
while (cursor < tokens.length && isSymbol(tokens[cursor], "(")) {
cursor++;
let args = [];
if (!isSymbol(tokens[cursor], ")")) {
while (true) {
args.push(parseExpr());
if (isSymbol(tokens[cursor], ",")) {
cursor++;
} else {
break; // if we get anything except for a comma we stop looping
}
}
}
expectSymbol(")", "Expect closing `)` in function call.");
expr = {
type: "expr",
exprType: "call",
callee: expr,
args: args,
};
}
return expr;
}
function parseErrorAtCursor(msg) {
throw `Parse error at token ${cursor}: ${msg}`;
}
function expect(expectedType, errorMsg) {
if (tokens[cursor].tokenType === expectedType) {
// we got the expected type
cursor++;
return tokens[cursor - 1];
} else {
parseErrorAtCursor(errorMsg);
}
}
function isSymbol(token, symbol) {
return token.tokenType === "symbol" && token.symbol === symbol;
}
function expectSymbol(symbolType, errorMsg) {
if (isSymbol(tokens[cursor], symbolType)) {
cursor++;
return tokens[cursor - 1];
} else {
parseErrorAtCursor(errorMsg);
}
}
function parseStmt() {
if (
tokens[cursor].tokenType === "keyword" &&
tokens[cursor].keyword === "let"
) {
cursor++;
let name = expect("identifier", "Expected variable name.");
expectSymbol("=", "Expected `=` after variable name");
let initializer = parseExpr();
return {
type: "stmt",
stmtType: "binding",
name: name,
initializer: initializer,
};
} else if (
tokens[cursor].tokenType === "keyword" &&
tokens[cursor].keyword === "return"
) {
cursor++;
let value = parseExpr();
return {
type: "stmt",
stmtType: "return",
value: value,
};
} else if (
tokens[cursor].tokenType === "identifier" &&
cursor + 1 < tokens.length &&
isSymbol(tokens[cursor + 1], "=")
) {
let name = tokens[cursor];
cursor += 2;
let value = parseExpr();
return {
type: "stmt",
stmtType: "assign",
name,
value,
};
} else {
let expr = parseExpr();
return {
type: "stmt",
stmtType: "exprStmt",
expr: expr,
};
}
}
let stmts = [];
while (cursor < tokens.length) {
stmts.push(parseStmt());
if (cursor === tokens.length || !isSymbol(tokens[cursor], ";")) {
parseErrorAtCursor("Expected ';'.");
}
cursor++; // skip the ;
}
return stmts;
}
class Return {
constructor(value) {
this.value = value;
}
}
function execute(stmts) {
let values = {
println: {
call: (args) => {
console.log(evaluateExpr(args[0]));
},
},
add: {
call: (args) => {
return evaluateExpr(args[0]) + evaluateExpr(args[1]);
},
},
lt: {
call: (args) => {
return evaluateExpr(args[0]) < evaluateExpr(args[1]);
},
},
while: {
call: (args) => {
while (evaluateExpr(args[0])) {
let func = evaluateExpr(args[1]);
func.call([]);
}
},
},
};
function evaluateExpr(expr) {
switch (expr.exprType) {
case "literal":
return expr.token.value;
case "identifier":
if (expr.name.name in values) {
return values[expr.name.name];
} else {
throw `Runtime Error: Undefined variable '${expr.name.name}'.`;
}
case "funLit":
return {
call: (args) => {
if (args.length !== expr.params.length) {
throw `Runtime Error: Function called with invalid number of arguments.`;
}
let shadowed = {};
expr.params.forEach((param, i) => {
if (param.name in values) {
shadowed[param.name] = values[param.name];
}
values[param.name] = evaluateExpr(args[i]);
});
function resetShadowed() {
Object.entries(shadowed).forEach(([name, value]) => {
values[name] = value;
});
}
for (let stmt of expr.body) {
try {
evaluateStmt(stmt);
} catch (e) {
if (e instanceof Return) {
resetShadowed();
return e.value;
} else {
throw e;
}
}
}
resetShadowed();
},
};
case "call":
let callee = evaluateExpr(expr.callee);
return callee.call(expr.args);
}
}
function evaluateStmt(stmt) {
switch (stmt.stmtType) {
case "binding":
let name = stmt.name.name; // the first name is the token the second is the string version of it
values[name] = evaluateExpr(stmt.initializer);
break;
case "assign":
values[stmt.name.name] = evaluateExpr(stmt.value);
break;
case "exprStmt":
evaluateExpr(stmt.expr);
break;
case "return":
throw new Return(evaluateExpr(stmt.value));
}
}
for (let stmt of stmts) {
evaluateStmt(stmt);
}
}
let code = `
let n = 0;
while(lt(n, 5), () -> {
println(n);
n = add(n, 1);
});
`;
let tokens = tokenize(code);
// console.log(tokens);
let ast = parse(tokens);
// console.log(ast);
execute(ast);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment