Last active
April 2, 2018 13:10
-
-
Save lqt0223/4d60a33450c4fba0f5ff1da163aac9b9 to your computer and use it in GitHub Desktop.
32 recursive descent parsing - arithmetic expression as an example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
parsing arithmetic expressions containing 1 digit integers as operands | |
and '+','-','*','/' as operators | |
context free grammar for arithmetic expressions with left-recursion eliminated: | |
- expr -> term + expr | term - expr | term | |
- term -> factor * term | factor / term | factor | |
- factor -> num | ( expr ) | |
*/ | |
// a cursor pointing to the position of the next input token | |
var i = 0 | |
// tell if the next input token matches some pattern | |
// if matched, move the cursor forward and return the next input token | |
// else return false | |
function match(token) { | |
// if the argument is a plain token literal, then do string comparison | |
if (typeof token != 'function') { | |
if (tokens[i] == token) { | |
i++ | |
return token | |
} else { | |
return false | |
} | |
// else, the argument is a predicate function, call it to see if the token matches | |
} else { | |
var tokenPred = token | |
var t = tokens[i] | |
if (tokenPred(t)) { | |
i++ | |
return t | |
} else { | |
return false | |
} | |
} | |
} | |
// expr -> expr1 | expr2 | expr3 | |
function parseExpr(tokenArr) { | |
var result | |
var _i = i | |
result = parseExpr1(tokenArr) | |
if (result) { | |
return result | |
} else { | |
// if a chosen derivation does not work, unchoose by restoring the saved cursor | |
// and choose next | |
i = _i | |
} | |
result = parseExpr2(tokenArr) | |
if (result) { | |
return result | |
} else { | |
i = _i | |
} | |
result = parseExpr3(tokenArr) | |
if (result) { | |
return result | |
} else { | |
i = _i | |
} | |
return false | |
} | |
// expr1 -> term + expr | |
function parseExpr1(tokenArr) { | |
var result = [] | |
var t1 = parseTerm(tokenArr) | |
if (!t1) return false | |
var t2 = match('+') | |
if (!t2) return false | |
var t3 = parseExpr(tokenArr) | |
if (!t3) return false | |
return [t1, t2, t3] | |
} | |
// expr2 -> term - expr | |
function parseExpr2(tokenArr) { | |
var result = [] | |
var t1 = parseTerm(tokenArr) | |
if (!t1) return false | |
var t2 = match('-') | |
if (!t2) return false | |
var t3 = parseExpr(tokenArr) | |
if (!t3) return false | |
return [t1, t2, t3] | |
} | |
// expr3 -> term | |
function parseExpr3(tokenArr) { | |
return parseTerm(tokenArr) | |
} | |
// term -> term1 | term2 | term3 | |
function parseTerm(tokenArr) { | |
var result | |
var _i = i | |
result = parseTerm1(tokenArr) | |
if (result) { | |
return result | |
} else { | |
i = _i | |
} | |
result = parseTerm2(tokenArr) | |
if (result) { | |
return result | |
} else { | |
i = _i | |
} | |
result = parseTerm3(tokenArr) | |
if (result) { | |
return result | |
} else { | |
i = _i | |
} | |
return false | |
} | |
// term1 -> factor * term | |
function parseTerm1(tokenArr) { | |
var result = [] | |
var t1 = parseFactor(tokenArr) | |
if (!t1) return false | |
var t2 = match('*') | |
if (!t2) return false | |
var t3 = parseTerm(tokenArr) | |
if (!t3) return false | |
return [t1, t2, t3] | |
} | |
// term2 -> factor / term | |
function parseTerm2(tokenArr) { | |
var result = [] | |
var t1 = parseFactor(tokenArr) | |
if (!t1) return false | |
var t2 = match('/') | |
if (!t2) return false | |
var t3 = parseTerm(tokenArr) | |
if (!t3) return false | |
return [t1, t2, t3] | |
} | |
// term3 -> factor | |
function parseTerm3(tokenArr) { | |
return parseFactor(tokenArr) | |
} | |
// factor -> factor1 | factor2 | |
function parseFactor(tokenArr) { | |
var result | |
var _i = i | |
result = parseFactor1(tokenArr) | |
if (result) { | |
return result | |
} else { | |
i = _i | |
} | |
result = parseFactor2(tokenArr) | |
if (result) { | |
return result | |
} else { | |
i = _i | |
} | |
} | |
// factor1 -> num | |
function parseFactor1(tokenArr) { | |
var t1 = match((t) => parseInt(t)) | |
return t1 | |
} | |
// factor2 -> ( expr ) | |
function parseFactor2(tokenArr) { | |
var t1 = match('(') | |
var t2 = parseExpr(tokenArr) | |
var t3 = match(')') | |
return t2 | |
} | |
var input = '2*(3+4)/5-6' | |
var tokens = input.split('') | |
var parsed = parseExpr(tokens) | |
console.log(JSON.stringify(parsed, null, 2)) | |
/* | |
[ | |
[ | |
"2", | |
"*", | |
[ | |
[ | |
"3", | |
"+", | |
"4" | |
], | |
"/", | |
"5" | |
] | |
], | |
"-", | |
"6" | |
] | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment