Created
December 9, 2013 15:48
-
-
Save aknuds1/7874252 to your computer and use it in GitHub Desktop.
Generate parser via Jison for an ambiguous grammar.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var Parser = require('jison').Parser; | |
var _ = require('underscore'); | |
grammar = { | |
Program: [ | |
['ProgramSection', '$$ = new yy.Program($1);'] | |
], | |
ProgramSection: [ | |
['Expression SEMICOLON', '$$ = new yy.ExpressionStatement($1);'] | |
], | |
Expression: [ | |
['DeclExpression', '$$ = $1;'], | |
['Expression OP DeclExpression', '$$ = new yy.ExpFromBinary($1, $2, $3);'] | |
], | |
DeclExpression: [ | |
['TypeDecl VarDeclList', '$$ = new yy.DeclExp($1, $2, 0);'], | |
['PrimaryExpression', '$$ = $1;'] | |
], | |
VarDeclList: [ | |
['VarDecl', '$$ = new yy.VarDeclList($1);'] | |
], | |
VarDecl: [ | |
['ID', '$$ = new yy.VarDecl($1);'] | |
], | |
TypeDecl: [ | |
['ID', '$$ = new yy.TypeDecl(new yy.IdList($1), 0);'] | |
], | |
PrimaryExpression: [ | |
['ID', '$$ = new yy.ExpFromId($1);'] | |
] | |
}; | |
var tokens = []; | |
_(_(grammar).keys()).each(function (name) { | |
var alternatives = grammar[name]; | |
grammar[name] = _(alternatives).map(function (alt) { | |
var theseAlternatives = alt[0].split(' '); | |
// console.log("Alternative: '" + theseAlternatives) | |
_(theseAlternatives).each(function (token) { | |
if (!grammar[token] && !_(tokens).some(function (t) { | |
return t == token; | |
})) { | |
//console.log("Terminal token: " + token) | |
tokens.push(token); | |
} else { | |
//console.log("Non-terminal token: " + token) | |
} | |
}); | |
if (name === "Program") { | |
alt[1] = "return " + alt[1]; | |
} | |
return alt; | |
}); | |
}); | |
var operators = []; | |
var parserConfig = { | |
tokens: tokens.join(' '), | |
bnf: grammar, | |
operators: operators.reverse(), | |
startSymbol: 'Program' | |
}; | |
console.log('Tokens:', parserConfig.tokens) | |
console.log('BNF:', parserConfig.bnf) | |
console.log('Operators:', parserConfig.operators) | |
exports.generate = new Parser(parserConfig).generate; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exports.count = function(string, substr) { | |
var num, pos; | |
num = pos = 0; | |
if (!substr.length) { | |
return 1/0; | |
} | |
while (pos = 1 + string.indexOf(substr, pos)) { | |
++num; | |
} | |
return num; | |
}; | |
var last; | |
exports.last = last = function(array, back) { return array[array.length - (back || 0) - 1]; }; | |
exports.throwSyntaxError = function(message, location) { | |
var error = new SyntaxError(message); | |
error.location = location; | |
error.toString = syntaxErrorToString; | |
error.stack = error.toString(); | |
console.log("Throwing error", error); | |
throw error; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var helpers = require('./helpers'); | |
var count = helpers.count, last = helpers.last, throwSyntaxError = helpers.throwSyntaxError; | |
var _ = require('underscore'); | |
exports.Lexer = function () { | |
var self = this; | |
self.tokenize = function (code) { | |
self.ends = []; // The stack for pairing up tokens. | |
self.tokens = []; // Stream of parsed tokens in the form `['TYPE', value, location data]`. | |
self.chunkLine = 0; | |
self.chunkColumn = 0; | |
self.code = self.clean(code); | |
var i = 0; | |
while (true) { | |
self.chunk = code.slice(i); | |
if (self.chunk.length <= 0) { | |
break; | |
} | |
// console.log("Current chunk: '" + self.chunk + "'"); | |
consumed = | |
self.identifierToken() || | |
self.commentToken() || | |
self.whitespaceToken() || | |
self.stringToken() || | |
self.numberToken() || | |
self.literalToken(); | |
// console.log("Consumed " + consumed + " characters"); | |
// Update position | |
var results = self.getLineAndColumnFromChunk(consumed); | |
self.chunkLine = results[0]; | |
self.chunkColumn = results[1]; | |
i += consumed; | |
} | |
if (tag = self.ends.pop()) { | |
self.error("missing " + tag) | |
} | |
return self.tokens; | |
}; | |
self.clean = function (code) { | |
if (code.charCodeAt(0) === BOM) { | |
code = code.slice(1) | |
} | |
code = code.replace(/\r/g, '').replace(TRAILING_SPACES, ''); | |
if (WHITESPACE.test(code)) { | |
code = "\n" + code; | |
--self.chunkLine; | |
} | |
return code; | |
} | |
self.identifierToken = function () { | |
var match = IDENTIFIER.exec(self.chunk); | |
if (!match) { | |
return 0; | |
} | |
var id = match[0]; | |
console.log("Token is an identifier: " + id); | |
// Preserve length of id for location data | |
var idLength = id.length; | |
var poppedToken = undefined; | |
var tag = 'ID'; | |
var tagToken = self.token(tag, id, 0, idLength); | |
if (poppedToken) { | |
tagToken[2].first_line = poppedToken[2].first_line; | |
tagToken[2].first_column = poppedToken[2].first_column; | |
} | |
return id.length; | |
} | |
self.numberToken = function () { | |
var match = NUMBER.exec(self.chunk); | |
if (!match) { | |
return 0; | |
} | |
var number = match[0]; | |
console.log("Token is a number: " + number); | |
self.token('NUMBER', number, 0, lexedLength); | |
return lexedLength; | |
} | |
self.stringToken = function () { | |
var quote = self.chunk.charAt(0); | |
var string = ''; | |
switch (quote) { | |
case "'": | |
string = SIMPLESTR.exec(self.chunk)[0]; | |
break; | |
case '"': | |
string = self.balancedString(self.chunk, '"'); | |
break; | |
} | |
if (string.length <= 0) { | |
return 0; | |
} | |
console.log("Token is a string: '" + string + "'"); | |
var trimmed = self._removeNewlines(string.slice(1)); | |
self.token('STRING', quote + self._escapeLines(trimmed) + quote, 0, string.length); | |
return string.length; | |
}; | |
self.commentToken = function () { | |
var match = self.chunk.match(COMMENT); | |
if (!match) { | |
return 0; | |
} | |
comment = match[0]; | |
here = match[1]; | |
console.log("Token is a comment"); | |
return comment.length; | |
}; | |
self.whitespaceToken = function () { | |
var match = WHITESPACE.exec(self.chunk); | |
var nline = self.chunk.charAt(0); | |
if (!match && nline === '\n') { | |
return 0; | |
} | |
// if (match) { | |
// console.log("whitespaceToken '" + match[0] + "'"); | |
// } | |
prev = last(self.tokens); | |
if (prev) { | |
if (match) { | |
prev['spaced'] = true; | |
} else { | |
prev['newLine'] = true; | |
} | |
} | |
if (match) { | |
return match[0].length; | |
} | |
return 0; | |
} | |
self.literalToken = function () { | |
var match = OPERATOR.exec(self.chunk); | |
var value; | |
if (match) { | |
value = match[0]; | |
console.log("Token is an operator: '" + value + "'"); | |
} else { | |
value = self.chunk.charAt(0); | |
} | |
var tag = value; | |
if (value === ';') { | |
console.log('Token is a semicolon') | |
tag = 'SEMICOLON' | |
} else if (_(OP).contains(value)) { | |
tag = 'OP'; | |
} | |
self.token(tag, value); | |
return value.length | |
}; | |
self.getLineAndColumnFromChunk = function (offset) { | |
if (offset === 0) { | |
return [self.chunkLine, self.chunkColumn]; | |
} | |
if (offset >= self.chunk.length) { | |
string = self.chunk; | |
} | |
else { | |
string = self.chunk.slice(0, offset - 1); | |
} | |
var lineCount = count(string, '\n'); | |
var column = self.chunkColumn; | |
if (lineCount > 0) { | |
var lines = string.split('\n'); | |
column = last(lines).length; | |
} else { | |
column += string.length; | |
} | |
return [self.chunkLine + lineCount, column]; | |
}; | |
self.makeToken = function (tag, value, offsetInChunk, length) { | |
if (offsetInChunk == null) { | |
offsetInChunk = 0; | |
} | |
if (length == null) { | |
length = value.length; | |
} | |
var locationData = {}; | |
var results = self.getLineAndColumnFromChunk(offsetInChunk); | |
locationData.first_line = results[0]; | |
locationData.first_column = results[1]; | |
var lastCharacter = Math.max(0, length - 1); | |
results = self.getLineAndColumnFromChunk(offsetInChunk + lastCharacter); | |
locationData.last_line = results[0]; | |
locationData.last_column = results[1]; | |
var token = [tag, value, locationData]; | |
return token; | |
}; | |
self.token = function (tag, value, offsetInChunk, length) { | |
var token = self.makeToken(tag, value, offsetInChunk, length); | |
if (token.length <= 0) { | |
throw new Error("Ugh"); | |
} | |
self.tokens.push(token); | |
console.log("Pushed token '" + token[0] + "'"); | |
return token; | |
}; | |
self.error = function (message, offset) { | |
offset = offset || 0; | |
var results = self.getLineAndColumnFromChunk(offset); | |
firstLine = results[0]; | |
firstColumn = results[1]; | |
self.throwSyntaxError(message, firstLine, firstColumn); | |
}; | |
self._removeNewlines = function(str) { | |
return str.replace(/^\s*\n\s*/, '').replace(/([^\\]|\\\\)\s*\n\s*$/, '$1'); | |
}; | |
self._escapeLines = function(str) { | |
str = str.replace(/\\[^\S\n]*(\n|\\)\s*/g, function(escaped, character) { | |
if (character === '\n') { | |
return ''; | |
} else { | |
return escaped; | |
} | |
}); | |
return str.replace(/\s*\n\s*/g, ' '); | |
}; | |
}; | |
var BOM = 65279; | |
var IDENTIFIER = /^[A-Za-z_][A-Za-z0-9_]*/; | |
var NUMBER = /^0[xX][0-9a-fA-F]+{IS}?|^0[cC][0-7]+{IS}?|^[0-9]+{IS}?|^([0-9]+"."[0-9]*)|([0-9]*"."[0-9]+)/i; | |
var OPERATOR = /=>/; | |
var WHITESPACE = /^[^\n\S]+/; | |
var COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|###$)|^(?:\s*#(?!##[^#]).*)+/; | |
var TRAILING_SPACES = /\s+$/; | |
var OP = ['=>']; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* parser generated by jison 0.4.13 */ | |
/* | |
Returns a Parser object of the following structure: | |
Parser: { | |
yy: {} | |
} | |
Parser.prototype: { | |
yy: {}, | |
trace: function(), | |
symbols_: {associative list: name ==> number}, | |
terminals_: {associative list: number ==> name}, | |
productions_: [...], | |
performAction: function anonymous(yytext, yyleng, yylineno, yy, yystate, $$, _$), | |
table: [...], | |
defaultActions: {...}, | |
parseError: function(str, hash), | |
parse: function(input), | |
lexer: { | |
EOF: 1, | |
parseError: function(str, hash), | |
setInput: function(input), | |
input: function(), | |
unput: function(str), | |
more: function(), | |
less: function(n), | |
pastInput: function(), | |
upcomingInput: function(), | |
showPosition: function(), | |
test_match: function(regex_match_array, rule_index), | |
next: function(), | |
lex: function(), | |
begin: function(condition), | |
popState: function(), | |
_currentRules: function(), | |
topState: function(), | |
pushState: function(condition), | |
options: { | |
ranges: boolean (optional: true ==> token location info will include a .range[] member) | |
flex: boolean (optional: true ==> flex-like lexing behaviour where the rules are tested exhaustively to find the longest match) | |
backtrack_lexer: boolean (optional: true ==> lexer regexes are tested in order and for each matching regex the action code is invoked; the lexer terminates the scan when a token is returned by the action code) | |
}, | |
performAction: function(yy, yy_, $avoiding_name_collisions, YY_START), | |
rules: [...], | |
conditions: {associative list: name ==> set}, | |
} | |
} | |
token location info (@$, _$, etc.): { | |
first_line: n, | |
last_line: n, | |
first_column: n, | |
last_column: n, | |
range: [start_number, end_number] (where the numbers are indexes into the input string, regular zero-based) | |
} | |
the parseError function receives a 'hash' object with these members for lexer and parser errors: { | |
text: (matched text) | |
token: (the produced terminal token, if any) | |
line: (yylineno) | |
} | |
while parser (grammar) errors will also provide these members, i.e. parser errors deliver a superset of attributes: { | |
loc: (yylloc) | |
expected: (string describing the set of expected tokens) | |
recoverable: (boolean: TRUE when the parser has a error recovery rule available for this particular error) | |
} | |
*/ | |
var parser = (function () { | |
var parser = {trace: function trace() { | |
}, | |
yy: {}, | |
symbols_: {"error": 2, "Program": 3, "ProgramSection": 4, "Expression": 5, "SEMICOLON": 6, "DeclExpression": 7, "OP": 8, "TypeDecl": 9, "VarDeclList": 10, "PrimaryExpression": 11, "VarDecl": 12, "ID": 13, "$accept": 0, "$end": 1}, | |
terminals_: {2: "error", 6: "SEMICOLON", 8: "OP", 13: "ID"}, | |
productions_: [0, [3, 1], [4, 2], [5, 1], [5, 3], [7, 2], [7, 1], [10, 1], [12, 1], [9, 1], [11, 1]], | |
performAction: function anonymous(yytext, yyleng, yylineno, yy, yystate /* action[1] */, $$ /* vstack */, _$ /* lstack */) { | |
/* this == yyval */ | |
var $0 = $$.length - 1; | |
switch (yystate) { | |
case 1: | |
return this.$ = new yy.Program($$[$0]); | |
break; | |
case 2: | |
this.$ = new yy.ExpressionStatement($$[$0 - 1]); | |
break; | |
case 3: | |
this.$ = $$[$0]; | |
break; | |
case 4: | |
this.$ = new yy.ExpFromBinary($$[$0 - 2], $$[$0 - 1], $$[$0]); | |
break; | |
case 5: | |
this.$ = new yy.DeclExp($$[$0 - 1], $$[$0], 0); | |
break; | |
case 6: | |
this.$ = $$[$0]; | |
break; | |
case 7: | |
this.$ = new yy.VarDeclList($$[$0]); | |
break; | |
case 8: | |
this.$ = new yy.VarDecl($$[$0]); | |
break; | |
case 9: | |
this.$ = new yy.TypeDecl(new yy.IdList($$[$0]), 0); | |
break; | |
case 10: | |
this.$ = new yy.ExpFromId($$[$0]); | |
break; | |
} | |
}, | |
table: [ | |
{3: 1, 4: 2, 5: 3, 7: 4, 9: 5, 11: 6, 13: [1, 7]}, | |
{1: [3]}, | |
{1: [2, 1]}, | |
{6: [1, 8], 8: [1, 9]}, | |
{6: [2, 3], 8: [2, 3]}, | |
{10: 10, 12: 11, 13: [1, 12]}, | |
{6: [2, 6], 8: [2, 6]}, | |
{6: [2, 9], 8: [2, 9], 13: [2, 9]}, | |
{1: [2, 2]}, | |
{7: 13, 9: 5, 11: 6, 13: [1, 7]}, | |
{6: [2, 5], 8: [2, 5]}, | |
{6: [2, 7], 8: [2, 7]}, | |
{6: [2, 8], 8: [2, 8]}, | |
{6: [2, 4], 8: [2, 4]} | |
], | |
defaultActions: {2: [2, 1], 8: [2, 2]}, | |
parseError: function parseError(str, hash) { | |
if (hash.recoverable) { | |
this.trace(str); | |
} else { | |
throw new Error(str); | |
} | |
}, | |
parse: function parse(input) { | |
var self = this, stack = [0], vstack = [null], lstack = [], table = this.table, yytext = '', yylineno = 0, yyleng = 0, recovering = 0, TERROR = 2, EOF = 1; | |
var args = lstack.slice.call(arguments, 1); | |
this.lexer.setInput(input); | |
this.lexer.yy = this.yy; | |
this.yy.lexer = this.lexer; | |
this.yy.parser = this; | |
if (typeof this.lexer.yylloc == 'undefined') { | |
this.lexer.yylloc = {}; | |
} | |
var yyloc = this.lexer.yylloc; | |
lstack.push(yyloc); | |
var ranges = this.lexer.options && this.lexer.options.ranges; | |
if (typeof this.yy.parseError === 'function') { | |
this.parseError = this.yy.parseError; | |
} else { | |
this.parseError = Object.getPrototypeOf(this).parseError; | |
} | |
function popStack(n) { | |
stack.length = stack.length - 2 * n; | |
vstack.length = vstack.length - n; | |
lstack.length = lstack.length - n; | |
} | |
function lex() { | |
var token; | |
token = self.lexer.lex() || EOF; | |
if (typeof token !== 'number') { | |
token = self.symbols_[token] || token; | |
} | |
return token; | |
} | |
var symbol, preErrorSymbol, state, action, a, r, yyval = {}, p, len, newState, expected; | |
while (true) { | |
state = stack[stack.length - 1]; | |
if (this.defaultActions[state]) { | |
action = this.defaultActions[state]; | |
} else { | |
if (symbol === null || typeof symbol == 'undefined') { | |
symbol = lex(); | |
} | |
action = table[state] && table[state][symbol]; | |
} | |
if (typeof action === 'undefined' || !action.length || !action[0]) { | |
var errStr = ''; | |
expected = []; | |
for (p in table[state]) { | |
if (this.terminals_[p] && p > TERROR) { | |
expected.push('\'' + this.terminals_[p] + '\''); | |
} | |
} | |
if (this.lexer.showPosition) { | |
errStr = 'Parse error on line ' + (yylineno + 1) + ':\n' + this.lexer.showPosition() + '\nExpecting ' + expected.join(', ') + ', got \'' + (this.terminals_[symbol] || symbol) + '\''; | |
} else { | |
errStr = 'Parse error on line ' + (yylineno + 1) + ': Unexpected ' + (symbol == EOF ? 'end of input' : '\'' + (this.terminals_[symbol] || symbol) + '\''); | |
} | |
this.parseError(errStr, { | |
text: this.lexer.match, | |
token: this.terminals_[symbol] || symbol, | |
line: this.lexer.yylineno, | |
loc: yyloc, | |
expected: expected | |
}); | |
} | |
if (action[0] instanceof Array && action.length > 1) { | |
throw new Error('Parse Error: multiple actions possible at state: ' + state + ', token: ' + symbol); | |
} | |
switch (action[0]) { | |
case 1: | |
stack.push(symbol); | |
vstack.push(this.lexer.yytext); | |
lstack.push(this.lexer.yylloc); | |
stack.push(action[1]); | |
symbol = null; | |
if (!preErrorSymbol) { | |
yyleng = this.lexer.yyleng; | |
yytext = this.lexer.yytext; | |
yylineno = this.lexer.yylineno; | |
yyloc = this.lexer.yylloc; | |
if (recovering > 0) { | |
recovering--; | |
} | |
} else { | |
symbol = preErrorSymbol; | |
preErrorSymbol = null; | |
} | |
break; | |
case 2: | |
len = this.productions_[action[1]][1]; | |
yyval.$ = vstack[vstack.length - len]; | |
yyval._$ = { | |
first_line: lstack[lstack.length - (len || 1)].first_line, | |
last_line: lstack[lstack.length - 1].last_line, | |
first_column: lstack[lstack.length - (len || 1)].first_column, | |
last_column: lstack[lstack.length - 1].last_column | |
}; | |
if (ranges) { | |
yyval._$.range = [ | |
lstack[lstack.length - (len || 1)].range[0], | |
lstack[lstack.length - 1].range[1] | |
]; | |
} | |
r = this.performAction.apply(yyval, [ | |
yytext, | |
yyleng, | |
yylineno, | |
this.yy, | |
action[1], | |
vstack, | |
lstack | |
].concat(args)); | |
if (typeof r !== 'undefined') { | |
return r; | |
} | |
if (len) { | |
stack = stack.slice(0, -1 * len * 2); | |
vstack = vstack.slice(0, -1 * len); | |
lstack = lstack.slice(0, -1 * len); | |
} | |
stack.push(this.productions_[action[1]][0]); | |
vstack.push(yyval.$); | |
lstack.push(yyval._$); | |
newState = table[stack[stack.length - 2]][stack[stack.length - 1]]; | |
stack.push(newState); | |
break; | |
case 3: | |
return true; | |
} | |
} | |
return true; | |
}}; | |
function Parser() { | |
this.yy = {}; | |
} | |
Parser.prototype = parser; | |
parser.Parser = Parser; | |
return new Parser; | |
})(); | |
if (typeof require !== 'undefined' && typeof exports !== 'undefined') { | |
exports.parser = parser; | |
exports.Parser = parser.Parser; | |
exports.parse = function () { | |
return parser.parse.apply(parser, arguments); | |
}; | |
exports.main = function commonjsMain(args) { | |
if (!args[1]) { | |
console.log('Usage: ' + args[0] + ' FILE'); | |
process.exit(1); | |
} | |
var source = require('fs').readFileSync(require('path').normalize(args[1]), "utf8"); | |
return exports.parser.parse(source); | |
}; | |
if (typeof module !== 'undefined' && require.main === module) { | |
exports.main(process.argv.slice(1)); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var helpers = require('./language/helpers'); | |
var _ = require('underscore'); | |
_.str = require('underscore.string'); | |
_.mixin(_.str.exports()); | |
var Lexer = require('./language/lexer').Lexer; | |
var Parser = require('./src/parser').Parser; | |
var yy = {}; | |
yy.ExpFromId = function (exp) { | |
this.exp = exp; | |
}; | |
yy.IdList = function (idList) { | |
this.idList = idList; | |
}; | |
yy.TypeDecl = function (decl) { | |
this.decl = decl; | |
}; | |
yy.VarDecl = function (decl) { | |
this.decl = decl; | |
}; | |
yy.VarDeclList = function (declList) { | |
this.declList = declList; | |
}; | |
yy.DeclExp = function (exp) { | |
this.exp = exp; | |
}; | |
yy.ExpressionStatement = function (statement) { | |
this.statement = statement; | |
}; | |
yy.Program = function (program) { | |
this.program = program; | |
}; | |
var sourceCode = "Type var => out;"; | |
var lexer = new Lexer(); | |
var tokens = lexer.tokenize(sourceCode); | |
var parser = new Parser(); | |
parser.yy = yy; | |
parser.lexer = { | |
lex: function () { | |
var token = this.tokens[this.pos++]; | |
var tag; | |
if (token) { | |
tag = token[0]; | |
this.yytext = token[1]; | |
this.yylloc = token[2]; | |
this.yylineno = this.yylloc.first_line; | |
} else { | |
tag = ''; | |
} | |
return tag; | |
}, | |
setInput: function (tokens) { | |
this.tokens = tokens; | |
this.pos = 0; | |
}, | |
upcomingInput: -function () { | |
return ""; | |
} | |
}; | |
//console.log("Got tokens:", tokens); | |
var parsed = parser.parse(tokens); | |
console.log("Got parse tree:", parsed); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment