Skip to content

Instantly share code, notes, and snippets.

@iomeone
Created May 22, 2019 11:16
Show Gist options
  • Save iomeone/70b31b839918e36d6c315e4c59218044 to your computer and use it in GitHub Desktop.
Save iomeone/70b31b839918e36d6c315e4c59218044 to your computer and use it in GitHub Desktop.
function Rule(name, symbols, postprocess) {
this.id = ++Rule.highestId;
this.name = name;
this.symbols = symbols; // a list of literal | regex class | nonterminal
this.postprocess = postprocess;
return this;
}
Rule.highestId = 0;
Rule.prototype.toString = function(withCursorAt) {
function stringifySymbolSequence (e) {
return e.literal ? JSON.stringify(e.literal) :
e.type ? '%' + e.type : e.toString();
}
var symbolSequence = (typeof withCursorAt === "undefined")
? this.symbols.map(stringifySymbolSequence).join(' ')
: ( this.symbols.slice(0, withCursorAt).map(stringifySymbolSequence).join(' ')
+ " ● "
+ this.symbols.slice(withCursorAt).map(stringifySymbolSequence).join(' ') );
return this.name + " → " + symbolSequence;
}
// a State is a rule at a position from a given starting point in the input stream (reference)
function State(rule, dot, reference, wantedBy) {
this.rule = rule;
this.dot = dot;
this.reference = reference;
this.data = [];
this.wantedBy = wantedBy;
this.isComplete = this.dot === rule.symbols.length;
}
State.prototype.toString = function() {
return "{" + this.rule.toString(this.dot) + "}, from: " + (this.reference || 0);
};
State.prototype.nextState = function(child) {
var state = new State(this.rule, this.dot + 1, this.reference, this.wantedBy);
state.left = this;
state.right = child;
if (state.isComplete) {
state.data = state.build();
}
return state;
};
State.prototype.build = function() {
var children = [];
var node = this;
do {
children.push(node.right.data);
node = node.left;
} while (node.left);
children.reverse();
return children;
};
State.prototype.finish = function() {
if (this.rule.postprocess) {
this.data = this.rule.postprocess(this.data, this.reference, Parser.fail);
}
};
function Column(grammar, index) {
this.grammar = grammar;
this.index = index;
this.states = [];
this.wants = {}; // states indexed by the non-terminal they expect
this.scannable = []; // list of states that expect a token
this.completed = {}; // states that are nullable
}
Column.prototype.process = function(nextColumn) {
var states = this.states;
var wants = this.wants;
var completed = this.completed;
for (var w = 0; w < states.length; w++) { // nb. we push() during iteration
var state = states[w];
if (state.isComplete) {
state.finish();
if (state.data !== Parser.fail) {
// complete
var wantedBy = state.wantedBy;
for (var i = wantedBy.length; i--; ) { // this line is hot
var left = wantedBy[i];
this.complete(left, state);
}
// special-case nullables
if (state.reference === this.index) {
// make sure future predictors of this rule get completed.
var exp = state.rule.name;
(this.completed[exp] = this.completed[exp] || []).push(state);
}
}
} else {
// queue scannable states
var exp = state.rule.symbols[state.dot];
if (typeof exp !== 'string') {
this.scannable.push(state);
continue;
}
// predict
if (wants[exp]) {
wants[exp].push(state);
if (completed.hasOwnProperty(exp)) {
var nulls = completed[exp];
for (var i = 0; i < nulls.length; i++) {
var right = nulls[i];
this.complete(state, right);
}
}
} else {
wants[exp] = [state];
this.predict(exp);
}
}
}
}
Column.prototype.predict = function(exp) {
var rules = this.grammar.byName[exp] || [];
for (var i = 0; i < rules.length; i++) {
var r = rules[i];
var wantedBy = this.wants[exp];
var s = new State(r, 0, this.index, wantedBy);
this.states.push(s);
}
}
Column.prototype.complete = function(left, right) {
var copy = left.nextState(right);
this.states.push(copy);
}
function Grammar(rules, start) {
this.rules = rules;
this.start = start || this.rules[0].name;
var byName = this.byName = {};
this.rules.forEach(function(rule) {
if (!byName.hasOwnProperty(rule.name)) {
byName[rule.name] = [];
}
byName[rule.name].push(rule);
});
}
// So we can allow passing (rules, start) directly to Parser for backwards compatibility
Grammar.fromCompiled = function(rules, start) {
var lexer = rules.Lexer;
if (rules.ParserStart) {
start = rules.ParserStart;
rules = rules.ParserRules;
}
var rules = rules.map(function (r) { return (new Rule(r.name, r.symbols, r.postprocess)); });
var g = new Grammar(rules, start);
g.lexer = lexer; // nb. storing lexer on Grammar is iffy, but unavoidable
return g;
}
function StreamLexer() {
this.reset("");
}
StreamLexer.prototype.reset = function(data, state) {
this.buffer = data;
this.index = 0;
this.line = state ? state.line : 1;
this.lastLineBreak = state ? -state.col : 0;
}
StreamLexer.prototype.next = function() {
if (this.index < this.buffer.length) {
var ch = this.buffer[this.index++];
if (ch === '\n') {
this.line += 1;
this.lastLineBreak = this.index;
}
return {value: ch};
}
}
StreamLexer.prototype.save = function() {
return {
line: this.line,
col: this.index - this.lastLineBreak,
}
}
StreamLexer.prototype.formatError = function(token, message) {
// nb. this gets called after consuming the offending token,
// so the culprit is index-1
var buffer = this.buffer;
if (typeof buffer === 'string') {
var nextLineBreak = buffer.indexOf('\n', this.index);
if (nextLineBreak === -1) nextLineBreak = buffer.length;
var line = buffer.substring(this.lastLineBreak, nextLineBreak)
var col = this.index - this.lastLineBreak;
message += " at line " + this.line + " col " + col + ":\n\n";
message += " " + line + "\n"
message += " " + Array(col).join(" ") + "^"
return message;
} else {
return message + " at index " + (this.index - 1);
}
}
function Parser(rules, start, options) {
if (rules instanceof Grammar) {
var grammar = rules;
var options = start;
} else {
var grammar = Grammar.fromCompiled(rules, start);
}
this.grammar = grammar;
// Read options
this.options = {
keepHistory: false,
lexer: grammar.lexer || new StreamLexer,
};
for (var key in (options || {})) {
this.options[key] = options[key];
}
// Setup lexer
this.lexer = this.options.lexer;
this.lexerState = undefined;
// Setup a table
var column = new Column(grammar, 0);
var table = this.table = [column];
// I could be expecting anything.
column.wants[grammar.start] = [];
column.predict(grammar.start);
// TODO what if start rule is nullable?
column.process();
this.current = 0; // token index
}
// create a reserved token for indicating a parse fail
Parser.fail = {};
Parser.prototype.feed = function(chunk) {
var lexer = this.lexer;
lexer.reset(chunk, this.lexerState);
var token;
while (token = lexer.next()) {
// We add new states to table[current+1]
var column = this.table[this.current];
// GC unused states
if (!this.options.keepHistory) {
delete this.table[this.current - 1];
}
var n = this.current + 1;
var nextColumn = new Column(this.grammar, n);
this.table.push(nextColumn);
// Advance all tokens that expect the symbol
var literal = token.text !== undefined ? token.text : token.value;
var value = lexer.constructor === StreamLexer ? token.value : token;
var scannable = column.scannable;
for (var w = scannable.length; w--; ) {
var state = scannable[w];
var expect = state.rule.symbols[state.dot];
// Try to consume the token
// either regex or literal
if (expect.test ? expect.test(value) :
expect.type ? expect.type === token.type
: expect.literal === literal) {
// Add it
var next = state.nextState({data: value, token: token, isToken: true, reference: n - 1});
nextColumn.states.push(next);
}
}
// Next, for each of the rules, we either
// (a) complete it, and try to see if the reference row expected that
// rule
// (b) predict the next nonterminal it expects by adding that
// nonterminal's start state
// To prevent duplication, we also keep track of rules we have already
// added
nextColumn.process();
// If needed, throw an error:
if (nextColumn.states.length === 0) {
// No states at all! This is not good.
var message = this.lexer.formatError(token, "invalid syntax") + "\n";
message += "Unexpected " + (token.type ? token.type + " token: " : "");
message += JSON.stringify(token.value !== undefined ? token.value : token) + "\n";
var err = new Error(message);
err.offset = this.current;
err.token = token;
throw err;
}
// maybe save lexer state
if (this.options.keepHistory) {
column.lexerState = lexer.save()
}
this.current++;
}
if (column) {
this.lexerState = lexer.save()
}
// Incrementally keep track of results
this.results = this.finish();
// Allow chaining, for whatever it's worth
return this;
};
Parser.prototype.save = function() {
var column = this.table[this.current];
column.lexerState = this.lexerState;
return column;
};
Parser.prototype.restore = function(column) {
var index = column.index;
this.current = index;
this.table[index] = column;
this.table.splice(index + 1);
this.lexerState = column.lexerState;
// Incrementally keep track of results
this.results = this.finish();
};
// nb. deprecated: use save/restore instead!
Parser.prototype.rewind = function(index) {
if (!this.options.keepHistory) {
throw new Error('set option `keepHistory` to enable rewinding')
}
// nb. recall column (table) indicies fall between token indicies.
// col 0 -- token 0 -- col 1
this.restore(this.table[index]);
};
Parser.prototype.finish = function() {
// Return the possible parsings
var considerations = [];
var start = this.grammar.start;
var column = this.table[this.table.length - 1]
column.states.forEach(function (t) {
if (t.rule.name === start
&& t.dot === t.rule.symbols.length
&& t.reference === 0
&& t.data !== Parser.fail) {
considerations.push(t);
}
});
return considerations.map(function(c) {return c.data; });
};
// Generated automatically by nearley, version 2.16.0
// http://github.com/Hardmath123/nearley
function id(x) { return x[0]; }
var grammar = {
Lexer: undefined,
ParserRules: [
{"name": "expression$string$1", "symbols": [{"literal":"1"}, {"literal":"+"}, {"literal":"2"}, {"literal":"+"}, {"literal":"3"}], "postprocess": function joiner(d) {return d.join('');}},
{"name": "expression", "symbols": ["expression$string$1"]}
]
, ParserStart: "expression"
}
//https://github.com/kach/nearley/blob/master/examples/calculator/calculator.js
var ans = new Parser(grammar.ParserRules, grammar.ParserStart).feed("1+2+3");
print (ans.results.length);
if (ans.results.length) {
print (ans.results[0].toString() );
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment