Created
September 18, 2018 22:19
-
-
Save wylieconlon/6c4c98e021e7206b5ef4fde702dbab34 to your computer and use it in GitHub Desktop.
Lisp Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const TOKENS = { | |
START: 'start', | |
END: 'end', | |
}; | |
function tokenize(code) { | |
let current = 0; | |
let startOfToken; | |
let tokens = []; | |
// Match the first character of any token | |
while (current < code.length) { | |
startOfToken = current; | |
switch(code[current]) { | |
case '(': | |
tokens.push(TOKENS.START); | |
current++; | |
break; | |
case ')': | |
tokens.push(TOKENS.END); | |
current++; | |
break; | |
case '"': | |
case '\'': | |
_consumeString(code[current]); | |
break; | |
case (code[current].match(/[0-9]/) || {}).input: | |
_consumeNumber(); | |
break; | |
case (code[current].match(/\S/) || {}).input: | |
// Non white space character | |
_consumeOperator(); | |
break; | |
default: | |
// All white space is ignored between tokens | |
current++; | |
break; | |
} | |
} | |
function _consumeOperator() { | |
while (/\S/.test(code[current]) && current < code.length) { | |
current++; | |
} | |
tokens.push(code.substring(startOfToken, current)); | |
} | |
function _consumeString(stringMarker) { | |
startOfToken++; | |
current++; | |
while (code[current] !== stringMarker && current < code.length) { | |
current++; | |
} | |
const stringToken = '"' + code.substring(startOfToken, current) + '"'; | |
tokens.push(stringToken); | |
current++; | |
} | |
function _consumeNumber() { | |
// This is not valid for exponentiated numbers such as 0.5e10 | |
while (/[0-9\.]/.test(code[current]) && current < code.length) { | |
current++; | |
} | |
tokens.push(parseFloat(code.substring(startOfToken, current))); | |
} | |
return tokens; | |
} | |
function parse(code) { | |
const tokens = tokenize(code); | |
let globalIndex = 0; | |
let errors = []; | |
let result = _consumeExpression(); | |
/** | |
* expression: | |
* start | |
* operator | |
* (repeated) literal or expression | |
* end | |
*/ | |
function _consumeExpression() { | |
let ast = []; | |
if (tokens[globalIndex] !== TOKENS.START) { | |
errors.push('Missing opening parentheses'); | |
} | |
globalIndex++; | |
if (globalIndex < tokens.length) { | |
const operator = tokens[globalIndex]; | |
if (typeof operator !== 'string') { | |
errors.push('Invalid operator ' + operator); | |
} else if (operator === TOKENS.START || operator === TOKENS.END) { | |
errors.push('Invalid operator ' + operator); | |
} else if (operator.indexOf('"') === 0) { | |
errors.push('Invalid operator ' + operator); | |
} else { | |
ast.push(operator); | |
} | |
globalIndex++; | |
} | |
while (globalIndex < tokens.length) { | |
if (tokens[globalIndex] === TOKENS.START) { | |
const expression = _consumeExpression(); | |
if (expression.length > 0) { | |
ast.push(expression); | |
} | |
} else if (tokens[globalIndex] === TOKENS.END) { | |
// Early return if we find an end token | |
return ast; | |
} else { | |
ast.push(tokens[globalIndex]); | |
} | |
globalIndex++; | |
} | |
errors.push('Missing closing parentheses'); | |
} | |
if (errors.length) { | |
throw new Error(errors.join('\n')); | |
} | |
return result; | |
} | |
module.exports = { | |
parse, | |
tokenize | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const assert = require('assert'); | |
const { tokenize, parse } = require('../parser.js'); | |
describe('Tokenizing list expressions', function() { | |
it('handles a simple numeric expression', function() { | |
const tokens = tokenize('(+ 1 2)'); | |
assert.deepEqual(tokens, ['start', '+', 1, 2, 'end']); | |
}); | |
it('handles a simple expression with string literals', function() { | |
const tokens = tokenize('(list "boba fett" "jabba the hutt")'); | |
assert.deepEqual(tokens, ['start', 'list', '"boba fett"', '"jabba the hutt"', 'end']); | |
}); | |
it('handles a nested expression', function() { | |
const tokens = tokenize('(- (+ 1 2) 5)'); | |
assert.deepEqual(tokens, ['start', '-', 'start', '+', 1, 2, 'end', 5, 'end']); | |
}); | |
it('ignores whitespaces', function() { | |
const tokens = tokenize('\n\n(\n + \n1\n2)\n\n'); | |
assert.deepEqual(tokens, ['start', '+', 1, 2, 'end']); | |
}); | |
}); | |
describe('Parsing tokens into AST', function() { | |
it('handles a simple expression', function() { | |
const ast = parse('(+ 1 2)'); | |
assert.deepEqual(ast, ['+', 1, 2]); | |
}); | |
it('handles a nested expression', function() { | |
const ast = parse('(- (+ 1 2) 5)'); | |
assert.deepEqual(ast, ['-', ['+', 1, 2], 5]); | |
}); | |
it('throws an error when the operator is a number', function() { | |
assert.throws(function() { | |
parse('(5)'); | |
}, /Invalid operator 5/); | |
}); | |
it('throws an error when the operator is a string', function() { | |
assert.throws(function() { | |
parse('("Not an operator")'); | |
}, /Invalid operator "Not an operator"/); | |
}); | |
it('throws an error when an expression is missing opening parens', function() { | |
assert.throws(function() { | |
parse('5)'); | |
}, /Missing opening parentheses/); | |
}); | |
it('throws an error when an expression is missing closing parens', function() { | |
assert.throws(function() { | |
parse('((list)'); | |
}, /Missing closing parentheses/); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment