Skip to content

Instantly share code, notes, and snippets.

@wylieconlon
Created September 18, 2018 22:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wylieconlon/6c4c98e021e7206b5ef4fde702dbab34 to your computer and use it in GitHub Desktop.
Save wylieconlon/6c4c98e021e7206b5ef4fde702dbab34 to your computer and use it in GitHub Desktop.
Lisp Parser
const TOKENS = {
START: 'start',
END: 'end',
};
function tokenize(code) {
let current = 0;
let startOfToken;
let tokens = [];
// Match the first character of any token
while (current < code.length) {
startOfToken = current;
switch(code[current]) {
case '(':
tokens.push(TOKENS.START);
current++;
break;
case ')':
tokens.push(TOKENS.END);
current++;
break;
case '"':
case '\'':
_consumeString(code[current]);
break;
case (code[current].match(/[0-9]/) || {}).input:
_consumeNumber();
break;
case (code[current].match(/\S/) || {}).input:
// Non white space character
_consumeOperator();
break;
default:
// All white space is ignored between tokens
current++;
break;
}
}
function _consumeOperator() {
while (/\S/.test(code[current]) && current < code.length) {
current++;
}
tokens.push(code.substring(startOfToken, current));
}
function _consumeString(stringMarker) {
startOfToken++;
current++;
while (code[current] !== stringMarker && current < code.length) {
current++;
}
const stringToken = '"' + code.substring(startOfToken, current) + '"';
tokens.push(stringToken);
current++;
}
function _consumeNumber() {
// This is not valid for exponentiated numbers such as 0.5e10
while (/[0-9\.]/.test(code[current]) && current < code.length) {
current++;
}
tokens.push(parseFloat(code.substring(startOfToken, current)));
}
return tokens;
}
function parse(code) {
const tokens = tokenize(code);
let globalIndex = 0;
let errors = [];
let result = _consumeExpression();
/**
* expression:
* start
* operator
* (repeated) literal or expression
* end
*/
function _consumeExpression() {
let ast = [];
if (tokens[globalIndex] !== TOKENS.START) {
errors.push('Missing opening parentheses');
}
globalIndex++;
if (globalIndex < tokens.length) {
const operator = tokens[globalIndex];
if (typeof operator !== 'string') {
errors.push('Invalid operator ' + operator);
} else if (operator === TOKENS.START || operator === TOKENS.END) {
errors.push('Invalid operator ' + operator);
} else if (operator.indexOf('"') === 0) {
errors.push('Invalid operator ' + operator);
} else {
ast.push(operator);
}
globalIndex++;
}
while (globalIndex < tokens.length) {
if (tokens[globalIndex] === TOKENS.START) {
const expression = _consumeExpression();
if (expression.length > 0) {
ast.push(expression);
}
} else if (tokens[globalIndex] === TOKENS.END) {
// Early return if we find an end token
return ast;
} else {
ast.push(tokens[globalIndex]);
}
globalIndex++;
}
errors.push('Missing closing parentheses');
}
if (errors.length) {
throw new Error(errors.join('\n'));
}
return result;
}
module.exports = {
parse,
tokenize
};
const assert = require('assert');
const { tokenize, parse } = require('../parser.js');
describe('Tokenizing list expressions', function() {
it('handles a simple numeric expression', function() {
const tokens = tokenize('(+ 1 2)');
assert.deepEqual(tokens, ['start', '+', 1, 2, 'end']);
});
it('handles a simple expression with string literals', function() {
const tokens = tokenize('(list "boba fett" "jabba the hutt")');
assert.deepEqual(tokens, ['start', 'list', '"boba fett"', '"jabba the hutt"', 'end']);
});
it('handles a nested expression', function() {
const tokens = tokenize('(- (+ 1 2) 5)');
assert.deepEqual(tokens, ['start', '-', 'start', '+', 1, 2, 'end', 5, 'end']);
});
it('ignores whitespaces', function() {
const tokens = tokenize('\n\n(\n + \n1\n2)\n\n');
assert.deepEqual(tokens, ['start', '+', 1, 2, 'end']);
});
});
describe('Parsing tokens into AST', function() {
it('handles a simple expression', function() {
const ast = parse('(+ 1 2)');
assert.deepEqual(ast, ['+', 1, 2]);
});
it('handles a nested expression', function() {
const ast = parse('(- (+ 1 2) 5)');
assert.deepEqual(ast, ['-', ['+', 1, 2], 5]);
});
it('throws an error when the operator is a number', function() {
assert.throws(function() {
parse('(5)');
}, /Invalid operator 5/);
});
it('throws an error when the operator is a string', function() {
assert.throws(function() {
parse('("Not an operator")');
}, /Invalid operator "Not an operator"/);
});
it('throws an error when an expression is missing opening parens', function() {
assert.throws(function() {
parse('5)');
}, /Missing opening parentheses/);
});
it('throws an error when an expression is missing closing parens', function() {
assert.throws(function() {
parse('((list)');
}, /Missing closing parentheses/);
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment