Created
May 22, 2019 11:16
-
-
Save iomeone/70b31b839918e36d6c315e4c59218044 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function Rule(name, symbols, postprocess) { | |
this.id = ++Rule.highestId; | |
this.name = name; | |
this.symbols = symbols; // a list of literal | regex class | nonterminal | |
this.postprocess = postprocess; | |
return this; | |
} | |
Rule.highestId = 0; | |
Rule.prototype.toString = function(withCursorAt) { | |
function stringifySymbolSequence (e) { | |
return e.literal ? JSON.stringify(e.literal) : | |
e.type ? '%' + e.type : e.toString(); | |
} | |
var symbolSequence = (typeof withCursorAt === "undefined") | |
? this.symbols.map(stringifySymbolSequence).join(' ') | |
: ( this.symbols.slice(0, withCursorAt).map(stringifySymbolSequence).join(' ') | |
+ " ● " | |
+ this.symbols.slice(withCursorAt).map(stringifySymbolSequence).join(' ') ); | |
return this.name + " → " + symbolSequence; | |
} | |
// a State is a rule at a position from a given starting point in the input stream (reference) | |
function State(rule, dot, reference, wantedBy) { | |
this.rule = rule; | |
this.dot = dot; | |
this.reference = reference; | |
this.data = []; | |
this.wantedBy = wantedBy; | |
this.isComplete = this.dot === rule.symbols.length; | |
} | |
State.prototype.toString = function() { | |
return "{" + this.rule.toString(this.dot) + "}, from: " + (this.reference || 0); | |
}; | |
State.prototype.nextState = function(child) { | |
var state = new State(this.rule, this.dot + 1, this.reference, this.wantedBy); | |
state.left = this; | |
state.right = child; | |
if (state.isComplete) { | |
state.data = state.build(); | |
} | |
return state; | |
}; | |
State.prototype.build = function() { | |
var children = []; | |
var node = this; | |
do { | |
children.push(node.right.data); | |
node = node.left; | |
} while (node.left); | |
children.reverse(); | |
return children; | |
}; | |
State.prototype.finish = function() { | |
if (this.rule.postprocess) { | |
this.data = this.rule.postprocess(this.data, this.reference, Parser.fail); | |
} | |
}; | |
function Column(grammar, index) { | |
this.grammar = grammar; | |
this.index = index; | |
this.states = []; | |
this.wants = {}; // states indexed by the non-terminal they expect | |
this.scannable = []; // list of states that expect a token | |
this.completed = {}; // states that are nullable | |
} | |
Column.prototype.process = function(nextColumn) { | |
var states = this.states; | |
var wants = this.wants; | |
var completed = this.completed; | |
for (var w = 0; w < states.length; w++) { // nb. we push() during iteration | |
var state = states[w]; | |
if (state.isComplete) { | |
state.finish(); | |
if (state.data !== Parser.fail) { | |
// complete | |
var wantedBy = state.wantedBy; | |
for (var i = wantedBy.length; i--; ) { // this line is hot | |
var left = wantedBy[i]; | |
this.complete(left, state); | |
} | |
// special-case nullables | |
if (state.reference === this.index) { | |
// make sure future predictors of this rule get completed. | |
var exp = state.rule.name; | |
(this.completed[exp] = this.completed[exp] || []).push(state); | |
} | |
} | |
} else { | |
// queue scannable states | |
var exp = state.rule.symbols[state.dot]; | |
if (typeof exp !== 'string') { | |
this.scannable.push(state); | |
continue; | |
} | |
// predict | |
if (wants[exp]) { | |
wants[exp].push(state); | |
if (completed.hasOwnProperty(exp)) { | |
var nulls = completed[exp]; | |
for (var i = 0; i < nulls.length; i++) { | |
var right = nulls[i]; | |
this.complete(state, right); | |
} | |
} | |
} else { | |
wants[exp] = [state]; | |
this.predict(exp); | |
} | |
} | |
} | |
} | |
Column.prototype.predict = function(exp) { | |
var rules = this.grammar.byName[exp] || []; | |
for (var i = 0; i < rules.length; i++) { | |
var r = rules[i]; | |
var wantedBy = this.wants[exp]; | |
var s = new State(r, 0, this.index, wantedBy); | |
this.states.push(s); | |
} | |
} | |
Column.prototype.complete = function(left, right) { | |
var copy = left.nextState(right); | |
this.states.push(copy); | |
} | |
function Grammar(rules, start) { | |
this.rules = rules; | |
this.start = start || this.rules[0].name; | |
var byName = this.byName = {}; | |
this.rules.forEach(function(rule) { | |
if (!byName.hasOwnProperty(rule.name)) { | |
byName[rule.name] = []; | |
} | |
byName[rule.name].push(rule); | |
}); | |
} | |
// So we can allow passing (rules, start) directly to Parser for backwards compatibility | |
Grammar.fromCompiled = function(rules, start) { | |
var lexer = rules.Lexer; | |
if (rules.ParserStart) { | |
start = rules.ParserStart; | |
rules = rules.ParserRules; | |
} | |
var rules = rules.map(function (r) { return (new Rule(r.name, r.symbols, r.postprocess)); }); | |
var g = new Grammar(rules, start); | |
g.lexer = lexer; // nb. storing lexer on Grammar is iffy, but unavoidable | |
return g; | |
} | |
function StreamLexer() { | |
this.reset(""); | |
} | |
StreamLexer.prototype.reset = function(data, state) { | |
this.buffer = data; | |
this.index = 0; | |
this.line = state ? state.line : 1; | |
this.lastLineBreak = state ? -state.col : 0; | |
} | |
StreamLexer.prototype.next = function() { | |
if (this.index < this.buffer.length) { | |
var ch = this.buffer[this.index++]; | |
if (ch === '\n') { | |
this.line += 1; | |
this.lastLineBreak = this.index; | |
} | |
return {value: ch}; | |
} | |
} | |
StreamLexer.prototype.save = function() { | |
return { | |
line: this.line, | |
col: this.index - this.lastLineBreak, | |
} | |
} | |
StreamLexer.prototype.formatError = function(token, message) { | |
// nb. this gets called after consuming the offending token, | |
// so the culprit is index-1 | |
var buffer = this.buffer; | |
if (typeof buffer === 'string') { | |
var nextLineBreak = buffer.indexOf('\n', this.index); | |
if (nextLineBreak === -1) nextLineBreak = buffer.length; | |
var line = buffer.substring(this.lastLineBreak, nextLineBreak) | |
var col = this.index - this.lastLineBreak; | |
message += " at line " + this.line + " col " + col + ":\n\n"; | |
message += " " + line + "\n" | |
message += " " + Array(col).join(" ") + "^" | |
return message; | |
} else { | |
return message + " at index " + (this.index - 1); | |
} | |
} | |
function Parser(rules, start, options) { | |
if (rules instanceof Grammar) { | |
var grammar = rules; | |
var options = start; | |
} else { | |
var grammar = Grammar.fromCompiled(rules, start); | |
} | |
this.grammar = grammar; | |
// Read options | |
this.options = { | |
keepHistory: false, | |
lexer: grammar.lexer || new StreamLexer, | |
}; | |
for (var key in (options || {})) { | |
this.options[key] = options[key]; | |
} | |
// Setup lexer | |
this.lexer = this.options.lexer; | |
this.lexerState = undefined; | |
// Setup a table | |
var column = new Column(grammar, 0); | |
var table = this.table = [column]; | |
// I could be expecting anything. | |
column.wants[grammar.start] = []; | |
column.predict(grammar.start); | |
// TODO what if start rule is nullable? | |
column.process(); | |
this.current = 0; // token index | |
} | |
// create a reserved token for indicating a parse fail | |
Parser.fail = {}; | |
Parser.prototype.feed = function(chunk) { | |
var lexer = this.lexer; | |
lexer.reset(chunk, this.lexerState); | |
var token; | |
while (token = lexer.next()) { | |
// We add new states to table[current+1] | |
var column = this.table[this.current]; | |
// GC unused states | |
if (!this.options.keepHistory) { | |
delete this.table[this.current - 1]; | |
} | |
var n = this.current + 1; | |
var nextColumn = new Column(this.grammar, n); | |
this.table.push(nextColumn); | |
// Advance all tokens that expect the symbol | |
var literal = token.text !== undefined ? token.text : token.value; | |
var value = lexer.constructor === StreamLexer ? token.value : token; | |
var scannable = column.scannable; | |
for (var w = scannable.length; w--; ) { | |
var state = scannable[w]; | |
var expect = state.rule.symbols[state.dot]; | |
// Try to consume the token | |
// either regex or literal | |
if (expect.test ? expect.test(value) : | |
expect.type ? expect.type === token.type | |
: expect.literal === literal) { | |
// Add it | |
var next = state.nextState({data: value, token: token, isToken: true, reference: n - 1}); | |
nextColumn.states.push(next); | |
} | |
} | |
// Next, for each of the rules, we either | |
// (a) complete it, and try to see if the reference row expected that | |
// rule | |
// (b) predict the next nonterminal it expects by adding that | |
// nonterminal's start state | |
// To prevent duplication, we also keep track of rules we have already | |
// added | |
nextColumn.process(); | |
// If needed, throw an error: | |
if (nextColumn.states.length === 0) { | |
// No states at all! This is not good. | |
var message = this.lexer.formatError(token, "invalid syntax") + "\n"; | |
message += "Unexpected " + (token.type ? token.type + " token: " : ""); | |
message += JSON.stringify(token.value !== undefined ? token.value : token) + "\n"; | |
var err = new Error(message); | |
err.offset = this.current; | |
err.token = token; | |
throw err; | |
} | |
// maybe save lexer state | |
if (this.options.keepHistory) { | |
column.lexerState = lexer.save() | |
} | |
this.current++; | |
} | |
if (column) { | |
this.lexerState = lexer.save() | |
} | |
// Incrementally keep track of results | |
this.results = this.finish(); | |
// Allow chaining, for whatever it's worth | |
return this; | |
}; | |
Parser.prototype.save = function() { | |
var column = this.table[this.current]; | |
column.lexerState = this.lexerState; | |
return column; | |
}; | |
Parser.prototype.restore = function(column) { | |
var index = column.index; | |
this.current = index; | |
this.table[index] = column; | |
this.table.splice(index + 1); | |
this.lexerState = column.lexerState; | |
// Incrementally keep track of results | |
this.results = this.finish(); | |
}; | |
// nb. deprecated: use save/restore instead! | |
Parser.prototype.rewind = function(index) { | |
if (!this.options.keepHistory) { | |
throw new Error('set option `keepHistory` to enable rewinding') | |
} | |
// nb. recall column (table) indicies fall between token indicies. | |
// col 0 -- token 0 -- col 1 | |
this.restore(this.table[index]); | |
}; | |
Parser.prototype.finish = function() { | |
// Return the possible parsings | |
var considerations = []; | |
var start = this.grammar.start; | |
var column = this.table[this.table.length - 1] | |
column.states.forEach(function (t) { | |
if (t.rule.name === start | |
&& t.dot === t.rule.symbols.length | |
&& t.reference === 0 | |
&& t.data !== Parser.fail) { | |
considerations.push(t); | |
} | |
}); | |
return considerations.map(function(c) {return c.data; }); | |
}; | |
// Generated automatically by nearley, version 2.16.0 | |
// http://github.com/Hardmath123/nearley | |
function id(x) { return x[0]; } | |
var grammar = { | |
Lexer: undefined, | |
ParserRules: [ | |
{"name": "expression$string$1", "symbols": [{"literal":"1"}, {"literal":"+"}, {"literal":"2"}, {"literal":"+"}, {"literal":"3"}], "postprocess": function joiner(d) {return d.join('');}}, | |
{"name": "expression", "symbols": ["expression$string$1"]} | |
] | |
, ParserStart: "expression" | |
} | |
//https://github.com/kach/nearley/blob/master/examples/calculator/calculator.js | |
var ans = new Parser(grammar.ParserRules, grammar.ParserStart).feed("1+2+3"); | |
print (ans.results.length); | |
if (ans.results.length) { | |
print (ans.results[0].toString() ); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment