Last active
August 29, 2015 13:56
-
-
Save nathan/8818606 to your computer and use it in GitHub Desktop.
Earley parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function T(s) {return {terminal:s}} | |
function S(n, c, i, k, d) {return {name:n, content:c, index:i, position:k, node:d}} | |
var productions = { | |
start: [['stmts']], | |
stmts: [ | |
['stmts', T(';'), 'stmts'], | |
['s', 'stmt', 's']], | |
stmt: [ | |
[T('{'), 'stmts', T('}')], | |
[T('p'), T('r'), T('i'), T('n'), T('t'), 's', 'exp'], | |
['exp']], | |
exp: [ | |
[T('i'), T('f'), 's', 'exp', 's', 'stmt', 's', T('e'), T('l'), T('s'), T('e'), 's', 'stmt'], | |
[T('i'), T('f'), 's', 'exp', 's', 'stmt'], | |
[T('1')]], | |
s: [ | |
[T(' '), 's'], | |
[]] | |
} | |
var input = 'if 1 print if 1 1 else 1 else if 1 { 1; print 1 } else 1' | |
var set = [] | |
var id = 0 | |
function add(i, state) { | |
if (!set[i]) set[i] = [] | |
for (var j = 0; j < set[i].length; j++) { | |
var s = set[i][j] | |
if (s.name === state.name && s.content === state.content && s.index === state.index && s.position === state.position) return | |
} | |
set[i].push(state) | |
} | |
productions.start.forEach(function(content) { | |
add(0, S('start', content, 0, 0, [])) | |
}) | |
for (var position = 0; position <= input.length; position++) { | |
if (!set[position]) set[position] = [] | |
for (var statePos = 0; statePos < set[position].length; statePos++) { | |
var state = set[position][statePos] | |
if (state.index < state.content.length) { | |
var symbol = state.content[state.index] | |
if (symbol.terminal) { | |
if (input[position] === symbol.terminal) { | |
add(position + 1, S(state.name, state.content, state.index + 1, state.position, state.node.concat(input[position]))) | |
} | |
} else { | |
productions[symbol].forEach(function(content) { | |
add(position, S(symbol, content, 0, position, [])) | |
}) | |
} | |
} else { | |
for (var statePos2 = 0; statePos2 < set[state.position].length; statePos2++) { | |
var s = set[state.position][statePos2] | |
if (s.content[s.index] === state.name) { | |
add(position, S(s.name, s.content, s.index + 1, s.position, s.node.concat([state.node]))) | |
} | |
} | |
} | |
} | |
} | |
set.forEach(function(states, position) { | |
console.log('') | |
console.log([].concat('S(' + position + '):', (position ? input.slice(0, position) : []), '•', (position < input.length ? input.slice(position) : [])).join(' ')) | |
states.forEach(function(s, i) { | |
var content = s.content.map(function(k) {return k.terminal || k}) | |
console.log([].concat('(' + (i+1) + ')', s.name, '→', content.slice(0, s.index), '•', content.slice(s.index)).join(' ')) | |
}) | |
console.log('') | |
}) | |
var last = set[input.length].filter(function(s) { | |
return s.name === 'start' && s.index === s.content.length | |
})[0] | |
if (last) { | |
console.log(require('util').inspect(last.node, null, {depth:-1})) | |
} else { | |
var errorPos = 0 | |
while (set[errorPos].length) { | |
errorPos += 1 | |
} | |
console.error('') | |
console.error('SyntaxError near:') | |
console.error(' ' + input) | |
console.error(' ' + Array(errorPos).join(' ') + '^') | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment