Skip to content

Instantly share code, notes, and snippets.

@gliese1337
Last active November 20, 2015 00:55
Show Gist options
  • Save gliese1337/453bd13693ad99fa84a9 to your computer and use it in GitHub Desktop.
Save gliese1337/453bd13693ad99fa84a9 to your computer and use it in GitHub Desktop.
Incremental, Back-Trackable Earley Recognizer for CFGs
var Earley = (function(){
function Grammar(rules) {
this.symbols = {};
rules.forEach(function(rule) {
// "A -> B C | D" -> ["A ", " B C | D"]
var parts = rule.split('->');
var lhs = parts[0].trim();
var rhss = parts[1].trim();
// "B C | D" -> ["B C", "D"]
var rhssParts = rhss.split('|').map(function(part){
return part.trim().split(' ');
});
if (!this.symbols[lhs]) {
this.symbols[lhs] = [];
}
[].push.apply(this.symbols[lhs], rhssParts);
// now this.symbols contains set of these rules:
// {... "A": [["B", "C"], ["D"]] ...}
}, this);
}
Grammar.prototype.getRules = function(sym) {
return this.symbols[sym] || [];
};
Grammar.prototype.hasSymbol = function(sym) {
return this.symbols.hasOwnProperty(sym);
};
//------------------------------------------------------------------------------------
function State(lhs, rhs, dot, start) {
this.lhs = lhs;
this.rhs = rhs;
this.dot = dot;
this.start = start;
}
State.prototype.complete = function() {
return this.dot >= this.rhs.length;
};
State.prototype.expectedNonTerminal = function(grammar) {
var expected = this.rhs[this.dot];
return grammar.hasSymbol(expected);
};
State.prototype.equals = function(state) {
return this.lhs === state.lhs &&
this.dot === state.dot &&
this.start === state.start &&
JSON.stringify(this.rhs) === JSON.stringify(state.rhs);
};
State.prototype.predictor = function(chart) {
var nonTerm = this.rhs[this.dot];
chart.grammar.getRules(nonTerm).forEach(function(rhs){
var newState = new State(nonTerm, rhs, 0, chart.level);
chart.addToChart(newState);
},this);
};
State.prototype.scanner = function(chart, token) {
var newState, term = this.rhs[this.dot];
if (token === term) {
newState = new State(term, [token], 1, chart.level - 1); // terminal, because dot == length
chart.addToChart(newState);
}
};
State.prototype.completer = function(chart) {
chart.getColumn(this.start).forEach(function(state){
var newState;
if (state.rhs[state.dot] == this.lhs) {
newState = new State(state.lhs, state.rhs, state.dot + 1, state.start);
chart.addToChart(newState);
}
},this);
};
//------------------------------------------------------------------------------------
function Parser(grammar, root, p) {
this.parent = p;
this.level = p?p.level + 1:0;
this.grammar = grammar;
this.root = root;
this.column = [];
this.finished = false;
if(!p){ this.init(); }
}
Parser.prototype.init = function(){
this.grammar.getRules(this.root).forEach(function(rhs){
var initialState = new State(this.root, rhs, 0, 0, 0);
this.addToChart(initialState, 0);
},this);
this.process();
};
Parser.prototype.addToChart = function(newState) {
if(!this.column.some(function(state){ return newState.equals(state); })) {
this.column.push(newState);
}
}
Parser.prototype.getColumn = function(index) {
var c = this;
while(c.level > index){
c = c.parent;
}
return c.column;
}
Parser.prototype.process = function(){
var j, state;
for (j = 0; j < this.column.length; j++) {
state = this.column[j];
if (state.complete()) {
if (state.lhs === this.root) {
this.finished = true;
}
state.completer(this);
} else if (state.expectedNonTerminal(grammar)) {
state.predictor(this);
}
}
this.column = this.column.filter(function(state){
return !state.complete();
});
};
Parser.prototype.next = function(token){
var j, state,
newchart = new Parser(this.grammar, this.root, this);
for (j = 0; j < chart.column.length; j++) {
state = this.column[j];
if (!state.complete() && !state.expectedNonTerminal(grammar)) {
state.scanner(newchart, token);
}
}
newchart.process();
return newchart;
};
var exports = {};
exports.Grammar = Grammar;
exports.Parser = Parser;
return exports;
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment