Created
July 17, 2018 02:21
-
-
Save MarkTiedemann/da095df7d8f9e8638a0ac5e6fbd45c6b to your computer and use it in GitHub Desktop.
Simple Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function Grammar(rules) { | |
this.rules = rules | |
} | |
function Rule(lhs, rhs) { | |
this.lhs = lhs | |
this.rhs = rhs | |
} | |
function parse(input) { | |
if (input === undefined) throw rangeError(input, 100) | |
if (input === null) throw rangeError(input, 101) | |
if (typeof input !== 'string') throw rangeError(input, 102) | |
if (input === '') throw rangeError(input, 103) | |
let i = 0 | |
let step = 0 | |
let line = 1 | |
let rules = [] | |
let lhs = [] | |
let rhs = [] | |
while (i <= input.length) { | |
let c = input[i++] | |
let error = prepareError(line, i, c) | |
switch (step) { | |
case 0: // [a-zA-Z] -> ` ` | |
if (c === undefined) throw error(/[a-zA-Z]/, -step) | |
else if (/[a-zA-Z]/.test(c)) lhs.push(c) | |
else if (c === ' ') step++ | |
else throw error(/[a-zA-Z]/, step) | |
break | |
case 1: // ` ` -> = | |
if (c === undefined) throw error('=', -step) | |
else if (c === '=') step++ | |
else throw error('=', step) | |
break | |
case 2: // = -> ` ` | |
if (c === undefined) throw error(' ', -step) | |
else if (c === ' ') step++ | |
else throw error(' ', step) | |
break | |
case 3: // ` ` -> [a-zA-Z] | |
if (c === undefined) throw error(/[a-zA-Z]/, -step) | |
else if (/[a-zA-Z]/.test(c)) { | |
rhs.push(c) | |
step++ | |
} else throw error(/[a-zA-Z]/, step) | |
break | |
case 4: // [a-zA-Z] -> ` ` | |
if (c === undefined) throw error(' ', -step) | |
else if (/[a-zA-Z]/.test(c)) rhs.push(c) | |
else if (c === ' ') step++ | |
else throw error(' ', step) | |
break | |
case 5: // ` ` -> ; | |
if (c === undefined) throw error(';', -step) | |
else if (c === ';') { | |
rules.push(new Rule(lhs.join(''), rhs.join(''))) | |
lhs = [] | |
rhs = [] | |
step++ | |
} else throw error(';', step) | |
break | |
case 6: // ; -> \r | \n | undefined | |
if (c === undefined) return new Grammar(rules) | |
else if (c === '\r') step = 7 | |
else if (c === '\n') { | |
step = 0 | |
line++ | |
} else throw error('\r | \n | end of input', step) | |
break | |
case 7: // ; -> \n | undefined | |
if (c === undefined) return new Grammar(rules) | |
else if (c === '\n') { | |
step = 0 | |
line++ | |
} else throw error('\n | end of input', step) | |
break | |
} | |
} | |
} | |
function rangeError(actual, code) { | |
let message = | |
`Expected input to be a non-empty string, ` + | |
`but found \`${actual}\` [code ${code}]` | |
let error = new RangeError(message) | |
Object.defineProperty(error, 'code', { value: code }) | |
return error | |
} | |
function prepareError(line, pos, actual) { | |
return (expected, code) => { | |
if (actual === undefined) actual = 'end of input' | |
if (expected === undefined) expected = 'end of input' | |
if (expected instanceof RegExp) expected = expected.source | |
let message = | |
`Expected \`${expected}\` at position ${pos} line ${line}, ` + | |
`but found \`${actual}\` [code ${formatValue(code)}]` | |
let error = new Error(message) | |
Object.defineProperty(error, 'code', { value: code }) | |
return error | |
} | |
} | |
function formatValue(n) { | |
return Object.is(n, -0) ? '-0' : n | |
} | |
function shouldParse(input, expected) { | |
let actualStr = JSON.stringify(parse(input)) | |
let expectedStr = JSON.stringify(expected) | |
assertEqual(actualStr, expectedStr) | |
} | |
function shouldThrow(input, expected) { | |
try { | |
parse(input) | |
console.log(`[x] !${expected}`) | |
} catch (actual) { | |
assertEqual(actual.code, expected) | |
} | |
} | |
function assertEqual(actual, expected) { | |
if (Object.is(actual, expected)) { | |
console.log('[x]') | |
} else { | |
console.log(`[ ] ${formatValue(actual)} != ${formatValue(expected)}`) | |
} | |
} | |
shouldThrow(undefined, 100) | |
shouldThrow(null, 101) | |
shouldThrow(1, 102) | |
shouldThrow('', 103) | |
shouldThrow('a', -0) | |
shouldThrow('~', 0) | |
shouldThrow('a ', -1) | |
shouldThrow('a ~', 1) | |
shouldThrow('a =', -2) | |
shouldThrow('a =~', 2) | |
shouldThrow('a = ', -3) | |
shouldThrow('a = ~', 3) | |
shouldThrow('a = b', -4) | |
shouldThrow('a = b~', 4) | |
shouldThrow('a = b ', -5) | |
shouldThrow('a = b ~', 5) | |
shouldThrow('a = b ;~', 6) | |
shouldThrow('a = b ;\r~', 7) | |
shouldParse('a = b ;', new Grammar([new Rule('a', 'b')])) | |
shouldParse( | |
'a = b ;\nc = d ;', | |
new Grammar([new Rule('a', 'b'), new Rule('c', 'd')]) | |
) | |
shouldParse( | |
'a = b ;\r\nc = d ;', | |
new Grammar([new Rule('a', 'b'), new Rule('c', 'd')]) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment