Skip to content

Instantly share code, notes, and snippets.

@aknuds1
Created December 9, 2013 15:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aknuds1/7874252 to your computer and use it in GitHub Desktop.
Save aknuds1/7874252 to your computer and use it in GitHub Desktop.
Generate parser via Jison for an ambiguous grammar.
var Parser = require('jison').Parser;
var _ = require('underscore');
grammar = {
Program: [
['ProgramSection', '$$ = new yy.Program($1);']
],
ProgramSection: [
['Expression SEMICOLON', '$$ = new yy.ExpressionStatement($1);']
],
Expression: [
['DeclExpression', '$$ = $1;'],
['Expression OP DeclExpression', '$$ = new yy.ExpFromBinary($1, $2, $3);']
],
DeclExpression: [
['TypeDecl VarDeclList', '$$ = new yy.DeclExp($1, $2, 0);'],
['PrimaryExpression', '$$ = $1;']
],
VarDeclList: [
['VarDecl', '$$ = new yy.VarDeclList($1);']
],
VarDecl: [
['ID', '$$ = new yy.VarDecl($1);']
],
TypeDecl: [
['ID', '$$ = new yy.TypeDecl(new yy.IdList($1), 0);']
],
PrimaryExpression: [
['ID', '$$ = new yy.ExpFromId($1);']
]
};
var tokens = [];
_(_(grammar).keys()).each(function (name) {
var alternatives = grammar[name];
grammar[name] = _(alternatives).map(function (alt) {
var theseAlternatives = alt[0].split(' ');
// console.log("Alternative: '" + theseAlternatives)
_(theseAlternatives).each(function (token) {
if (!grammar[token] && !_(tokens).some(function (t) {
return t == token;
})) {
//console.log("Terminal token: " + token)
tokens.push(token);
} else {
//console.log("Non-terminal token: " + token)
}
});
if (name === "Program") {
alt[1] = "return " + alt[1];
}
return alt;
});
});
var operators = [];
var parserConfig = {
tokens: tokens.join(' '),
bnf: grammar,
operators: operators.reverse(),
startSymbol: 'Program'
};
console.log('Tokens:', parserConfig.tokens)
console.log('BNF:', parserConfig.bnf)
console.log('Operators:', parserConfig.operators)
exports.generate = new Parser(parserConfig).generate;
exports.count = function(string, substr) {
var num, pos;
num = pos = 0;
if (!substr.length) {
return 1/0;
}
while (pos = 1 + string.indexOf(substr, pos)) {
++num;
}
return num;
};
var last;
exports.last = last = function(array, back) { return array[array.length - (back || 0) - 1]; };
exports.throwSyntaxError = function(message, location) {
var error = new SyntaxError(message);
error.location = location;
error.toString = syntaxErrorToString;
error.stack = error.toString();
console.log("Throwing error", error);
throw error;
};
var helpers = require('./helpers');
var count = helpers.count, last = helpers.last, throwSyntaxError = helpers.throwSyntaxError;
var _ = require('underscore');
exports.Lexer = function () {
var self = this;
self.tokenize = function (code) {
self.ends = []; // The stack for pairing up tokens.
self.tokens = []; // Stream of parsed tokens in the form `['TYPE', value, location data]`.
self.chunkLine = 0;
self.chunkColumn = 0;
self.code = self.clean(code);
var i = 0;
while (true) {
self.chunk = code.slice(i);
if (self.chunk.length <= 0) {
break;
}
// console.log("Current chunk: '" + self.chunk + "'");
consumed =
self.identifierToken() ||
self.commentToken() ||
self.whitespaceToken() ||
self.stringToken() ||
self.numberToken() ||
self.literalToken();
// console.log("Consumed " + consumed + " characters");
// Update position
var results = self.getLineAndColumnFromChunk(consumed);
self.chunkLine = results[0];
self.chunkColumn = results[1];
i += consumed;
}
if (tag = self.ends.pop()) {
self.error("missing " + tag)
}
return self.tokens;
};
self.clean = function (code) {
if (code.charCodeAt(0) === BOM) {
code = code.slice(1)
}
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, '');
if (WHITESPACE.test(code)) {
code = "\n" + code;
--self.chunkLine;
}
return code;
}
self.identifierToken = function () {
var match = IDENTIFIER.exec(self.chunk);
if (!match) {
return 0;
}
var id = match[0];
console.log("Token is an identifier: " + id);
// Preserve length of id for location data
var idLength = id.length;
var poppedToken = undefined;
var tag = 'ID';
var tagToken = self.token(tag, id, 0, idLength);
if (poppedToken) {
tagToken[2].first_line = poppedToken[2].first_line;
tagToken[2].first_column = poppedToken[2].first_column;
}
return id.length;
}
self.numberToken = function () {
var match = NUMBER.exec(self.chunk);
if (!match) {
return 0;
}
var number = match[0];
console.log("Token is a number: " + number);
self.token('NUMBER', number, 0, lexedLength);
return lexedLength;
}
self.stringToken = function () {
var quote = self.chunk.charAt(0);
var string = '';
switch (quote) {
case "'":
string = SIMPLESTR.exec(self.chunk)[0];
break;
case '"':
string = self.balancedString(self.chunk, '"');
break;
}
if (string.length <= 0) {
return 0;
}
console.log("Token is a string: '" + string + "'");
var trimmed = self._removeNewlines(string.slice(1));
self.token('STRING', quote + self._escapeLines(trimmed) + quote, 0, string.length);
return string.length;
};
self.commentToken = function () {
var match = self.chunk.match(COMMENT);
if (!match) {
return 0;
}
comment = match[0];
here = match[1];
console.log("Token is a comment");
return comment.length;
};
self.whitespaceToken = function () {
var match = WHITESPACE.exec(self.chunk);
var nline = self.chunk.charAt(0);
if (!match && nline === '\n') {
return 0;
}
// if (match) {
// console.log("whitespaceToken '" + match[0] + "'");
// }
prev = last(self.tokens);
if (prev) {
if (match) {
prev['spaced'] = true;
} else {
prev['newLine'] = true;
}
}
if (match) {
return match[0].length;
}
return 0;
}
self.literalToken = function () {
var match = OPERATOR.exec(self.chunk);
var value;
if (match) {
value = match[0];
console.log("Token is an operator: '" + value + "'");
} else {
value = self.chunk.charAt(0);
}
var tag = value;
if (value === ';') {
console.log('Token is a semicolon')
tag = 'SEMICOLON'
} else if (_(OP).contains(value)) {
tag = 'OP';
}
self.token(tag, value);
return value.length
};
self.getLineAndColumnFromChunk = function (offset) {
if (offset === 0) {
return [self.chunkLine, self.chunkColumn];
}
if (offset >= self.chunk.length) {
string = self.chunk;
}
else {
string = self.chunk.slice(0, offset - 1);
}
var lineCount = count(string, '\n');
var column = self.chunkColumn;
if (lineCount > 0) {
var lines = string.split('\n');
column = last(lines).length;
} else {
column += string.length;
}
return [self.chunkLine + lineCount, column];
};
self.makeToken = function (tag, value, offsetInChunk, length) {
if (offsetInChunk == null) {
offsetInChunk = 0;
}
if (length == null) {
length = value.length;
}
var locationData = {};
var results = self.getLineAndColumnFromChunk(offsetInChunk);
locationData.first_line = results[0];
locationData.first_column = results[1];
var lastCharacter = Math.max(0, length - 1);
results = self.getLineAndColumnFromChunk(offsetInChunk + lastCharacter);
locationData.last_line = results[0];
locationData.last_column = results[1];
var token = [tag, value, locationData];
return token;
};
self.token = function (tag, value, offsetInChunk, length) {
var token = self.makeToken(tag, value, offsetInChunk, length);
if (token.length <= 0) {
throw new Error("Ugh");
}
self.tokens.push(token);
console.log("Pushed token '" + token[0] + "'");
return token;
};
self.error = function (message, offset) {
offset = offset || 0;
var results = self.getLineAndColumnFromChunk(offset);
firstLine = results[0];
firstColumn = results[1];
self.throwSyntaxError(message, firstLine, firstColumn);
};
self._removeNewlines = function(str) {
return str.replace(/^\s*\n\s*/, '').replace(/([^\\]|\\\\)\s*\n\s*$/, '$1');
};
self._escapeLines = function(str) {
str = str.replace(/\\[^\S\n]*(\n|\\)\s*/g, function(escaped, character) {
if (character === '\n') {
return '';
} else {
return escaped;
}
});
return str.replace(/\s*\n\s*/g, ' ');
};
};
var BOM = 65279;
var IDENTIFIER = /^[A-Za-z_][A-Za-z0-9_]*/;
var NUMBER = /^0[xX][0-9a-fA-F]+{IS}?|^0[cC][0-7]+{IS}?|^[0-9]+{IS}?|^([0-9]+"."[0-9]*)|([0-9]*"."[0-9]+)/i;
var OPERATOR = /=>/;
var WHITESPACE = /^[^\n\S]+/;
var COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|###$)|^(?:\s*#(?!##[^#]).*)+/;
var TRAILING_SPACES = /\s+$/;
var OP = ['=>'];
/* parser generated by jison 0.4.13 */
/*
Returns a Parser object of the following structure:
Parser: {
yy: {}
}
Parser.prototype: {
yy: {},
trace: function(),
symbols_: {associative list: name ==> number},
terminals_: {associative list: number ==> name},
productions_: [...],
performAction: function anonymous(yytext, yyleng, yylineno, yy, yystate, $$, _$),
table: [...],
defaultActions: {...},
parseError: function(str, hash),
parse: function(input),
lexer: {
EOF: 1,
parseError: function(str, hash),
setInput: function(input),
input: function(),
unput: function(str),
more: function(),
less: function(n),
pastInput: function(),
upcomingInput: function(),
showPosition: function(),
test_match: function(regex_match_array, rule_index),
next: function(),
lex: function(),
begin: function(condition),
popState: function(),
_currentRules: function(),
topState: function(),
pushState: function(condition),
options: {
ranges: boolean (optional: true ==> token location info will include a .range[] member)
flex: boolean (optional: true ==> flex-like lexing behaviour where the rules are tested exhaustively to find the longest match)
backtrack_lexer: boolean (optional: true ==> lexer regexes are tested in order and for each matching regex the action code is invoked; the lexer terminates the scan when a token is returned by the action code)
},
performAction: function(yy, yy_, $avoiding_name_collisions, YY_START),
rules: [...],
conditions: {associative list: name ==> set},
}
}
token location info (@$, _$, etc.): {
first_line: n,
last_line: n,
first_column: n,
last_column: n,
range: [start_number, end_number] (where the numbers are indexes into the input string, regular zero-based)
}
the parseError function receives a 'hash' object with these members for lexer and parser errors: {
text: (matched text)
token: (the produced terminal token, if any)
line: (yylineno)
}
while parser (grammar) errors will also provide these members, i.e. parser errors deliver a superset of attributes: {
loc: (yylloc)
expected: (string describing the set of expected tokens)
recoverable: (boolean: TRUE when the parser has a error recovery rule available for this particular error)
}
*/
var parser = (function () {
var parser = {trace: function trace() {
},
yy: {},
symbols_: {"error": 2, "Program": 3, "ProgramSection": 4, "Expression": 5, "SEMICOLON": 6, "DeclExpression": 7, "OP": 8, "TypeDecl": 9, "VarDeclList": 10, "PrimaryExpression": 11, "VarDecl": 12, "ID": 13, "$accept": 0, "$end": 1},
terminals_: {2: "error", 6: "SEMICOLON", 8: "OP", 13: "ID"},
productions_: [0, [3, 1], [4, 2], [5, 1], [5, 3], [7, 2], [7, 1], [10, 1], [12, 1], [9, 1], [11, 1]],
performAction: function anonymous(yytext, yyleng, yylineno, yy, yystate /* action[1] */, $$ /* vstack */, _$ /* lstack */) {
/* this == yyval */
var $0 = $$.length - 1;
switch (yystate) {
case 1:
return this.$ = new yy.Program($$[$0]);
break;
case 2:
this.$ = new yy.ExpressionStatement($$[$0 - 1]);
break;
case 3:
this.$ = $$[$0];
break;
case 4:
this.$ = new yy.ExpFromBinary($$[$0 - 2], $$[$0 - 1], $$[$0]);
break;
case 5:
this.$ = new yy.DeclExp($$[$0 - 1], $$[$0], 0);
break;
case 6:
this.$ = $$[$0];
break;
case 7:
this.$ = new yy.VarDeclList($$[$0]);
break;
case 8:
this.$ = new yy.VarDecl($$[$0]);
break;
case 9:
this.$ = new yy.TypeDecl(new yy.IdList($$[$0]), 0);
break;
case 10:
this.$ = new yy.ExpFromId($$[$0]);
break;
}
},
table: [
{3: 1, 4: 2, 5: 3, 7: 4, 9: 5, 11: 6, 13: [1, 7]},
{1: [3]},
{1: [2, 1]},
{6: [1, 8], 8: [1, 9]},
{6: [2, 3], 8: [2, 3]},
{10: 10, 12: 11, 13: [1, 12]},
{6: [2, 6], 8: [2, 6]},
{6: [2, 9], 8: [2, 9], 13: [2, 9]},
{1: [2, 2]},
{7: 13, 9: 5, 11: 6, 13: [1, 7]},
{6: [2, 5], 8: [2, 5]},
{6: [2, 7], 8: [2, 7]},
{6: [2, 8], 8: [2, 8]},
{6: [2, 4], 8: [2, 4]}
],
defaultActions: {2: [2, 1], 8: [2, 2]},
parseError: function parseError(str, hash) {
if (hash.recoverable) {
this.trace(str);
} else {
throw new Error(str);
}
},
parse: function parse(input) {
var self = this, stack = [0], vstack = [null], lstack = [], table = this.table, yytext = '', yylineno = 0, yyleng = 0, recovering = 0, TERROR = 2, EOF = 1;
var args = lstack.slice.call(arguments, 1);
this.lexer.setInput(input);
this.lexer.yy = this.yy;
this.yy.lexer = this.lexer;
this.yy.parser = this;
if (typeof this.lexer.yylloc == 'undefined') {
this.lexer.yylloc = {};
}
var yyloc = this.lexer.yylloc;
lstack.push(yyloc);
var ranges = this.lexer.options && this.lexer.options.ranges;
if (typeof this.yy.parseError === 'function') {
this.parseError = this.yy.parseError;
} else {
this.parseError = Object.getPrototypeOf(this).parseError;
}
function popStack(n) {
stack.length = stack.length - 2 * n;
vstack.length = vstack.length - n;
lstack.length = lstack.length - n;
}
function lex() {
var token;
token = self.lexer.lex() || EOF;
if (typeof token !== 'number') {
token = self.symbols_[token] || token;
}
return token;
}
var symbol, preErrorSymbol, state, action, a, r, yyval = {}, p, len, newState, expected;
while (true) {
state = stack[stack.length - 1];
if (this.defaultActions[state]) {
action = this.defaultActions[state];
} else {
if (symbol === null || typeof symbol == 'undefined') {
symbol = lex();
}
action = table[state] && table[state][symbol];
}
if (typeof action === 'undefined' || !action.length || !action[0]) {
var errStr = '';
expected = [];
for (p in table[state]) {
if (this.terminals_[p] && p > TERROR) {
expected.push('\'' + this.terminals_[p] + '\'');
}
}
if (this.lexer.showPosition) {
errStr = 'Parse error on line ' + (yylineno + 1) + ':\n' + this.lexer.showPosition() + '\nExpecting ' + expected.join(', ') + ', got \'' + (this.terminals_[symbol] || symbol) + '\'';
} else {
errStr = 'Parse error on line ' + (yylineno + 1) + ': Unexpected ' + (symbol == EOF ? 'end of input' : '\'' + (this.terminals_[symbol] || symbol) + '\'');
}
this.parseError(errStr, {
text: this.lexer.match,
token: this.terminals_[symbol] || symbol,
line: this.lexer.yylineno,
loc: yyloc,
expected: expected
});
}
if (action[0] instanceof Array && action.length > 1) {
throw new Error('Parse Error: multiple actions possible at state: ' + state + ', token: ' + symbol);
}
switch (action[0]) {
case 1:
stack.push(symbol);
vstack.push(this.lexer.yytext);
lstack.push(this.lexer.yylloc);
stack.push(action[1]);
symbol = null;
if (!preErrorSymbol) {
yyleng = this.lexer.yyleng;
yytext = this.lexer.yytext;
yylineno = this.lexer.yylineno;
yyloc = this.lexer.yylloc;
if (recovering > 0) {
recovering--;
}
} else {
symbol = preErrorSymbol;
preErrorSymbol = null;
}
break;
case 2:
len = this.productions_[action[1]][1];
yyval.$ = vstack[vstack.length - len];
yyval._$ = {
first_line: lstack[lstack.length - (len || 1)].first_line,
last_line: lstack[lstack.length - 1].last_line,
first_column: lstack[lstack.length - (len || 1)].first_column,
last_column: lstack[lstack.length - 1].last_column
};
if (ranges) {
yyval._$.range = [
lstack[lstack.length - (len || 1)].range[0],
lstack[lstack.length - 1].range[1]
];
}
r = this.performAction.apply(yyval, [
yytext,
yyleng,
yylineno,
this.yy,
action[1],
vstack,
lstack
].concat(args));
if (typeof r !== 'undefined') {
return r;
}
if (len) {
stack = stack.slice(0, -1 * len * 2);
vstack = vstack.slice(0, -1 * len);
lstack = lstack.slice(0, -1 * len);
}
stack.push(this.productions_[action[1]][0]);
vstack.push(yyval.$);
lstack.push(yyval._$);
newState = table[stack[stack.length - 2]][stack[stack.length - 1]];
stack.push(newState);
break;
case 3:
return true;
}
}
return true;
}};
function Parser() {
this.yy = {};
}
Parser.prototype = parser;
parser.Parser = Parser;
return new Parser;
})();
if (typeof require !== 'undefined' && typeof exports !== 'undefined') {
exports.parser = parser;
exports.Parser = parser.Parser;
exports.parse = function () {
return parser.parse.apply(parser, arguments);
};
exports.main = function commonjsMain(args) {
if (!args[1]) {
console.log('Usage: ' + args[0] + ' FILE');
process.exit(1);
}
var source = require('fs').readFileSync(require('path').normalize(args[1]), "utf8");
return exports.parser.parse(source);
};
if (typeof module !== 'undefined' && require.main === module) {
exports.main(process.argv.slice(1));
}
}
var helpers = require('./language/helpers');
var _ = require('underscore');
_.str = require('underscore.string');
_.mixin(_.str.exports());
var Lexer = require('./language/lexer').Lexer;
var Parser = require('./src/parser').Parser;
var yy = {};
yy.ExpFromId = function (exp) {
this.exp = exp;
};
yy.IdList = function (idList) {
this.idList = idList;
};
yy.TypeDecl = function (decl) {
this.decl = decl;
};
yy.VarDecl = function (decl) {
this.decl = decl;
};
yy.VarDeclList = function (declList) {
this.declList = declList;
};
yy.DeclExp = function (exp) {
this.exp = exp;
};
yy.ExpressionStatement = function (statement) {
this.statement = statement;
};
yy.Program = function (program) {
this.program = program;
};
var sourceCode = "Type var => out;";
var lexer = new Lexer();
var tokens = lexer.tokenize(sourceCode);
var parser = new Parser();
parser.yy = yy;
parser.lexer = {
lex: function () {
var token = this.tokens[this.pos++];
var tag;
if (token) {
tag = token[0];
this.yytext = token[1];
this.yylloc = token[2];
this.yylineno = this.yylloc.first_line;
} else {
tag = '';
}
return tag;
},
setInput: function (tokens) {
this.tokens = tokens;
this.pos = 0;
},
upcomingInput: -function () {
return "";
}
};
//console.log("Got tokens:", tokens);
var parsed = parser.parse(tokens);
console.log("Got parse tree:", parsed);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment