-
-
Save kartiknair/31f3d61b5ac5ceb56f951e0ea278363c to your computer and use it in GitHub Desktop.
Little interpreter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function isDigit(str) { | |
return /^\d+$/.test(str); | |
} | |
function isAlpha(str) { | |
return /[a-z]/i.test(str); | |
} | |
function isWhitespace(str) { | |
return /\s/.test(str); | |
} | |
// old: code -> syntax tree | |
// new: code (string) -> tokens -> syntax tree | |
/* | |
let foo = 23; | |
-- lexing (lexical) or tokenizing | |
// has no concept of invalid syntax | |
// only exists to make white-space irrelevant | |
tokens: [ | |
{type: 'keyword', keyword: 'let'} | |
{type: 'identifier', name: 'foo'} | |
{type: 'symbol', symbol: '='} | |
{type: 'numberLiteral', value: 23} | |
{type: 'symbol', symbol: ';'} | |
] | |
-- parsing | |
tree: { | |
type: 'stmt', | |
stmtType: 'let', | |
name: {type: 'identifier', name: 'foo'}, // a token | |
initializer: { | |
type: 'expr', | |
exprType: 'literal', | |
token: {type: 'numberLiteral', value: 23}, | |
} | |
} | |
*/ | |
function tokenize(code) { | |
let cursor = 0; | |
let keywords = ["let", "return", "true", "false"]; | |
let symbols = [";", ",", "=", "(", ")", "{", "}"]; | |
function token() { | |
// [ ] identifiers (names): e.g. foo, bar | |
// [ ] keywords: e.g. let, if, else, while | |
// [*] symbols: e.g. ;, ', ", ., =, (, ) | |
// [*] literals: e.g. 24, 52, "hello", 'a' | |
if (isWhitespace(code[cursor])) { | |
cursor++; | |
return null; | |
} | |
if (symbols.includes(code[cursor])) { | |
cursor++; | |
return { | |
type: "token", | |
tokenType: "symbol", | |
symbol: code[cursor - 1], | |
}; | |
} else if (code[cursor] === "-") { | |
cursor++; | |
if (code[cursor] === ">") { | |
cursor++; | |
return { | |
type: "token", | |
tokenType: "symbol", | |
symbol: "->", | |
}; | |
} else { | |
throw `Lexer error: unexpected symbol '-'`; | |
} | |
} | |
if (isDigit(code[cursor])) { | |
// number | |
let numberBeginIdx = cursor; | |
while (isDigit(code[cursor])) { | |
cursor++; | |
} | |
let numberEndIdx = cursor; | |
let numberString = code.substring(numberBeginIdx, numberEndIdx); | |
let numberValue = parseInt(numberString); | |
return { | |
type: "token", | |
tokenType: "number", | |
value: numberValue, | |
}; | |
} | |
if (code[cursor] === `'` || code[cursor] === `"`) { | |
// string | |
let quote = code[cursor]; | |
cursor++; | |
let stringBeginIdx = cursor; | |
while (code[cursor] !== quote) { | |
cursor++; | |
} | |
let stringEndIdx = cursor; | |
let stringValue = code.substring(stringBeginIdx, stringEndIdx); | |
cursor++; // skip the closing quote | |
return { | |
typ: "token", | |
tokenType: "string", | |
value: stringValue, | |
}; | |
} | |
if (isAlpha(code[cursor])) { | |
let identBeginIdx = cursor; | |
while ( | |
cursor < code.length && | |
(isAlpha(code[cursor]) || isDigit(code[cursor])) | |
) { | |
cursor++; | |
} | |
let identEndIdx = cursor; | |
let name = code.substring(identBeginIdx, identEndIdx); // "let", "bar" | |
if (keywords.includes(name)) { | |
return { | |
type: "token", | |
tokenType: "keyword", | |
keyword: name, | |
}; | |
} else { | |
return { | |
type: "token", | |
tokenType: "identifier", | |
name: name, | |
}; | |
} | |
} | |
throw `Lexer error: unknown character '${code[cursor]}'`; | |
} | |
let tokens = []; | |
while (cursor < code.length) { | |
let t = token(); | |
if (t !== null) { | |
tokens.push(t); | |
} | |
} | |
return tokens; | |
} | |
function parse(tokens) { | |
let cursor = 0; | |
function parseExpr() { | |
let expr; | |
if ( | |
tokens[cursor].tokenType === "number" || | |
tokens[cursor].tokenType === "string" | |
) { | |
cursor++; | |
expr = { | |
type: "expr", | |
exprType: "literal", | |
token: tokens[cursor - 1], | |
}; | |
} else if ( | |
tokens[cursor].tokenType === "keyword" && | |
(tokens[cursor].keyword === "true" || tokens[cursor].keyword === "false") | |
) { | |
cursor++; | |
expr = { | |
type: "expr", | |
exprType: "literal", | |
token: { | |
...tokens[cursor - 1], | |
// add in the JavaScript value | |
value: tokens[cursor - 1].keyword === "true", | |
}, | |
}; | |
} else if (tokens[cursor].tokenType === "identifier") { | |
cursor++; | |
expr = { | |
type: "expr", | |
exprType: "identifier", | |
name: tokens[cursor - 1], | |
}; | |
} else if (isSymbol(tokens[cursor], "(")) { | |
cursor++; | |
let params = []; | |
if (!isSymbol(tokens[cursor], ")")) { | |
while (true) { | |
let param = expect( | |
"identifier", | |
"Expected parameter name in function literal." | |
); | |
params.push(param); | |
if (isSymbol(tokens[cursor], ",")) { | |
cursor++; | |
} else { | |
break; // if we get anything except for a comma we stop looping | |
} | |
} | |
} | |
expectSymbol( | |
")", | |
"Expected closing `)` after parameters in function literal." | |
); | |
expectSymbol( | |
"->", | |
"Expected '->' after parameter list in function literal." | |
); | |
expectSymbol( | |
"{", | |
"Expected opening '{' after arrow in function literal." | |
); | |
let body = []; | |
while (cursor < tokens.length && !isSymbol(tokens[cursor], "}")) { | |
body.push(parseStmt()); | |
if (!isSymbol(tokens[cursor], ";")) { | |
parseErrorAtCursor("Expected ';'."); | |
} | |
cursor++; // skip the ; | |
} | |
expectSymbol("}", "Expected closing '{' after body in function literal."); | |
expr = { | |
type: "expr", | |
exprType: "funLit", | |
params: params, | |
body: body, | |
}; | |
} else { | |
parseErrorAtCursor("Expected expression."); | |
} | |
while (cursor < tokens.length && isSymbol(tokens[cursor], "(")) { | |
cursor++; | |
let args = []; | |
if (!isSymbol(tokens[cursor], ")")) { | |
while (true) { | |
args.push(parseExpr()); | |
if (isSymbol(tokens[cursor], ",")) { | |
cursor++; | |
} else { | |
break; // if we get anything except for a comma we stop looping | |
} | |
} | |
} | |
expectSymbol(")", "Expect closing `)` in function call."); | |
expr = { | |
type: "expr", | |
exprType: "call", | |
callee: expr, | |
args: args, | |
}; | |
} | |
return expr; | |
} | |
function parseErrorAtCursor(msg) { | |
throw `Parse error at token ${cursor}: ${msg}`; | |
} | |
function expect(expectedType, errorMsg) { | |
if (tokens[cursor].tokenType === expectedType) { | |
// we got the expected type | |
cursor++; | |
return tokens[cursor - 1]; | |
} else { | |
parseErrorAtCursor(errorMsg); | |
} | |
} | |
function isSymbol(token, symbol) { | |
return token.tokenType === "symbol" && token.symbol === symbol; | |
} | |
function expectSymbol(symbolType, errorMsg) { | |
if (isSymbol(tokens[cursor], symbolType)) { | |
cursor++; | |
return tokens[cursor - 1]; | |
} else { | |
parseErrorAtCursor(errorMsg); | |
} | |
} | |
function parseStmt() { | |
if ( | |
tokens[cursor].tokenType === "keyword" && | |
tokens[cursor].keyword === "let" | |
) { | |
cursor++; | |
let name = expect("identifier", "Expected variable name."); | |
expectSymbol("=", "Expected `=` after variable name"); | |
let initializer = parseExpr(); | |
return { | |
type: "stmt", | |
stmtType: "binding", | |
name: name, | |
initializer: initializer, | |
}; | |
} else if ( | |
tokens[cursor].tokenType === "keyword" && | |
tokens[cursor].keyword === "return" | |
) { | |
cursor++; | |
let value = parseExpr(); | |
return { | |
type: "stmt", | |
stmtType: "return", | |
value: value, | |
}; | |
} else if ( | |
tokens[cursor].tokenType === "identifier" && | |
cursor + 1 < tokens.length && | |
isSymbol(tokens[cursor + 1], "=") | |
) { | |
let name = tokens[cursor]; | |
cursor += 2; | |
let value = parseExpr(); | |
return { | |
type: "stmt", | |
stmtType: "assign", | |
name, | |
value, | |
}; | |
} else { | |
let expr = parseExpr(); | |
return { | |
type: "stmt", | |
stmtType: "exprStmt", | |
expr: expr, | |
}; | |
} | |
} | |
let stmts = []; | |
while (cursor < tokens.length) { | |
stmts.push(parseStmt()); | |
if (cursor === tokens.length || !isSymbol(tokens[cursor], ";")) { | |
parseErrorAtCursor("Expected ';'."); | |
} | |
cursor++; // skip the ; | |
} | |
return stmts; | |
} | |
class Return { | |
constructor(value) { | |
this.value = value; | |
} | |
} | |
function execute(stmts) { | |
let values = { | |
println: { | |
call: (args) => { | |
console.log(evaluateExpr(args[0])); | |
}, | |
}, | |
add: { | |
call: (args) => { | |
return evaluateExpr(args[0]) + evaluateExpr(args[1]); | |
}, | |
}, | |
lt: { | |
call: (args) => { | |
return evaluateExpr(args[0]) < evaluateExpr(args[1]); | |
}, | |
}, | |
while: { | |
call: (args) => { | |
while (evaluateExpr(args[0])) { | |
let func = evaluateExpr(args[1]); | |
func.call([]); | |
} | |
}, | |
}, | |
}; | |
function evaluateExpr(expr) { | |
switch (expr.exprType) { | |
case "literal": | |
return expr.token.value; | |
case "identifier": | |
if (expr.name.name in values) { | |
return values[expr.name.name]; | |
} else { | |
throw `Runtime Error: Undefined variable '${expr.name.name}'.`; | |
} | |
case "funLit": | |
return { | |
call: (args) => { | |
if (args.length !== expr.params.length) { | |
throw `Runtime Error: Function called with invalid number of arguments.`; | |
} | |
let shadowed = {}; | |
expr.params.forEach((param, i) => { | |
if (param.name in values) { | |
shadowed[param.name] = values[param.name]; | |
} | |
values[param.name] = evaluateExpr(args[i]); | |
}); | |
function resetShadowed() { | |
Object.entries(shadowed).forEach(([name, value]) => { | |
values[name] = value; | |
}); | |
} | |
for (let stmt of expr.body) { | |
try { | |
evaluateStmt(stmt); | |
} catch (e) { | |
if (e instanceof Return) { | |
resetShadowed(); | |
return e.value; | |
} else { | |
throw e; | |
} | |
} | |
} | |
resetShadowed(); | |
}, | |
}; | |
case "call": | |
let callee = evaluateExpr(expr.callee); | |
return callee.call(expr.args); | |
} | |
} | |
function evaluateStmt(stmt) { | |
switch (stmt.stmtType) { | |
case "binding": | |
let name = stmt.name.name; // the first name is the token the second is the string version of it | |
values[name] = evaluateExpr(stmt.initializer); | |
break; | |
case "assign": | |
values[stmt.name.name] = evaluateExpr(stmt.value); | |
break; | |
case "exprStmt": | |
evaluateExpr(stmt.expr); | |
break; | |
case "return": | |
throw new Return(evaluateExpr(stmt.value)); | |
} | |
} | |
for (let stmt of stmts) { | |
evaluateStmt(stmt); | |
} | |
} | |
let code = ` | |
let n = 0; | |
while(lt(n, 5), () -> { | |
println(n); | |
n = add(n, 1); | |
}); | |
`; | |
let tokens = tokenize(code); | |
// console.log(tokens); | |
let ast = parse(tokens); | |
// console.log(ast); | |
execute(ast); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment