Last active
December 27, 2015 01:59
-
-
Save shamansir/7249136 to your computer and use it in GitHub Desktop.
An example of generated parser for PEGjs-fn (http://github.com/shamansir/pegjs-fn)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Classic example grammar, which recognizes simple arithmetic expressions like | |
* "2*(3+4)". The parser generated from this grammar then computes their value. | |
*/ | |
start | |
= additive | |
additive | |
= left:multiplicative "+" right:additive { return left + right; } | |
/ multiplicative | |
multiplicative | |
= left:primary "*" right:multiplicative { return left * right; } | |
/ primary | |
primary | |
= integer | |
/ "(" additive:additive ")" { return additive; } | |
integer "integer" | |
= digits:$[0-9]+ { return parseInt(digits, 10); } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* ----------- BLOCKS ----------- */ | |
"additive": [ | |
function(č) { | |
// additive[0] | |
return (function(left,right) { return left + right; })(č.left,č.right); | |
} | |
], | |
"multiplicative": [ | |
function(č) { | |
// multiplicative[0] | |
return (function(left,right) { return left * right; })(č.left,č.right); | |
} | |
], | |
"primary": [ | |
function(č) { | |
// primary[0] | |
return (function(additive) { return additive; })(č.additive); | |
} | |
], | |
"integer": [ | |
function(č) { | |
// integer[0] | |
return (function(digits) { return parseInt(digits, 10); })(č.digits); | |
} | |
] | |
/* ----------- RULES DEFINITIONS ----------- */ | |
rules.start = function() { | |
return ( | |
ref(rules.additive) | |
()); | |
} | |
rules.additive = function() { | |
var _code = ƒ.additive; | |
return ( | |
choice( | |
action( | |
seqnc( | |
label("left", | |
ref(rules.multiplicative) | |
), | |
match("+"), | |
label("right", | |
ref(rules.additive) | |
) | |
), | |
_code[0]) | |
/*{ return left + right; }*/, | |
ref(rules.multiplicative) | |
) | |
()); | |
} | |
rules.multiplicative = function() { | |
var _code = ƒ.multiplicative; | |
return ( | |
choice( | |
action( | |
seqnc( | |
label("left", | |
ref(rules.primary) | |
), | |
match("*"), | |
label("right", | |
ref(rules.multiplicative) | |
) | |
), | |
_code[0]) | |
/*{ return left * right; }*/, | |
ref(rules.primary) | |
) | |
()); | |
} | |
rules.primary = function() { | |
var _code = ƒ.primary; | |
return ( | |
choice( | |
ref(rules.integer), | |
action( | |
seqnc( | |
match("("), | |
label("additive", | |
ref(rules.additive) | |
), | |
match(")") | |
), | |
_code[0]) | |
/*{ return additive; }*/ | |
) | |
()); | |
} | |
rules.integer = function() { | |
var _code = ƒ.integer; | |
return ( | |
as("integer", | |
action( | |
label("digits", | |
text( | |
some( | |
re(/^[0-9]/, "[0-9]") | |
) | |
) | |
), | |
_code[0]) | |
/*{ return parseInt(digits, 10); }*/ | |
) | |
()); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module.exports = (function(){ | |
/* Generated by PEG.js-fn @VERSION (http://pegjs.majda.cz/). */ | |
/* Functional modification by shaman.sir@gmail.com (http://shamansir.github.com/). */ | |
/* ########### ENVIRONMENT ########### */ | |
var input, | |
options; | |
var pos, // 0, parser position | |
p_pos; // 0, previous parser position | |
// This code encloses all of the user blocks (initializer and/or actions) | |
// in their own sandbox, so if there is an initializer, its inner variables | |
// will [only] be accessible to actions; this, however, requires an initializer | |
// not to have any first-level return statements. Also, this approach keeps parser | |
// inner variables safe from user access, except the ones defined above. | |
var __p_blocks = (function() { return function() { | |
// backwards compatibility with original peg-js | |
function offset() { return p_pos; }; | |
function text() { return input.substring(p_pos, pos); }; | |
function line() { return __p_coord(p_pos)[1]; }; | |
function column() { return __p_coord(p_pos)[0]; }; | |
function cell() { return __p_coord(p_pos); }; | |
/* ########### USER CODE ########### */ | |
/* ----------- BLOCKS ----------- */ | |
// Blocks are grouped by rule name and id; they all get access to current context | |
// through č variable which they expand into their arguments. Arguments | |
// names are precalculated during parser generation process. | |
// ƒ and č variables are named so creepy just to ensure that parser writer will not use them | |
// for naming variables in his code (only č may clash in this architecture, in fact), | |
// we hope any modern environment supports Unicode now | |
return { | |
"additive": [ | |
function(č) { | |
// additive[0] | |
return (function(left,right) { | |
return left + right; | |
})(č.left,č.right); | |
} | |
], | |
"multiplicative": [ | |
function(č) { | |
// multiplicative[0] | |
return (function(left,right) { | |
return left * right; | |
})(č.left,č.right); | |
} | |
], | |
"primary": [ | |
function(č) { | |
// primary[0] | |
return (function(additive) { | |
return additive; | |
})(č.additive); | |
} | |
], | |
"integer": [ | |
function(č) { | |
// integer[0] | |
return (function(digits) { | |
return parseInt(digits, 10); | |
})(č.digits); | |
} | |
] | |
}; | |
} })(); | |
// ƒ and č variables are named so creepy just to ensure that parser writer will not use them | |
// for naming variables in his code (only č may clash in this architecture, in fact), | |
// we hope any modern environment supports Unicode now | |
var ƒ = null; // holds a pointer to current rule blocks, will be initialized in parse() function | |
/* ########### PARSER ########### */ | |
var __parser = function() { | |
/* =========== PARSER-DEPENDENT CODE =========== */ | |
/* ----------- RULES DEFINITIONS ----------- */ | |
var rules = {}; (function() { | |
rules.start = function() { | |
return ( | |
ref(rules.additive) | |
()); | |
} | |
rules.additive = function() { | |
var _code = ƒ.additive; | |
return ( | |
choice( | |
action( | |
seqnc( | |
label("left", | |
ref(rules.multiplicative) | |
), | |
match("+"), | |
label("right", | |
ref(rules.additive) | |
) | |
), | |
_code[0]) | |
/*{ return left + right; }*/, | |
ref(rules.multiplicative) | |
) | |
()); | |
} | |
rules.multiplicative = function() { | |
var _code = ƒ.multiplicative; | |
return ( | |
choice( | |
action( | |
seqnc( | |
label("left", | |
ref(rules.primary) | |
), | |
match("*"), | |
label("right", | |
ref(rules.multiplicative) | |
) | |
), | |
_code[0]) | |
/*{ return left * right; }*/, | |
ref(rules.primary) | |
) | |
()); | |
} | |
rules.primary = function() { | |
var _code = ƒ.primary; | |
return ( | |
choice( | |
ref(rules.integer), | |
action( | |
seqnc( | |
match("("), | |
label("additive", | |
ref(rules.additive) | |
), | |
match(")") | |
), | |
_code[0]) | |
/*{ return additive; }*/ | |
) | |
()); | |
} | |
rules.integer = function() { | |
var _code = ƒ.integer; | |
return ( | |
as("integer", | |
action( | |
label("digits", | |
text( | |
some( | |
re(/^[0-9]/, "[0-9]") | |
) | |
) | |
), | |
_code[0]) | |
/*{ return parseInt(digits, 10); }*/ | |
) | |
()); | |
} | |
})(); | |
/* ----------- OPERATORS ----------- */ | |
// get current char | |
function cc() { return (pos < ilen) ? input.charAt(pos) : EOI; } | |
var ref = def(inctx); // will call rule inside context | |
function action(f, code) { | |
return inctx(function() { | |
p_pos = pos; var res; // save previous position | |
f(); res = code(cctx); | |
if (res === null) { pos = p_pos; | |
failed(SOMETHING, NOTHING); } | |
return res; | |
}); | |
} | |
action = def(action); | |
function seqnc(/*f...*/) { | |
var p_pos = pos; // save previous position locally | |
var fs = arguments, | |
s = [], | |
on_miss = function(e) { | |
pos = p_pos; throw e; }; | |
for (var fi = 0, fl = fs.length; | |
fi < fl; fi++) { | |
s.push(safe(fs[fi], on_miss)); | |
} | |
return s; | |
} | |
seqnc = def(seqnc); | |
function as(name, f) { | |
alias = name; var res = f(); | |
alias = ''; return res; | |
} | |
as = def(as); | |
function choice(/*f...*/) { | |
var fs = arguments, | |
missed = 0, | |
my_e = null, | |
on_miss = function(e) { | |
my_e = e; missed = 1; | |
}; | |
for (var fi = 0, fl = fs.length; | |
fi < fl; fi++) { | |
var res = safe(fs[fi], on_miss); | |
if (!missed) return res; | |
missed = 0; | |
} | |
throw my_e; | |
} | |
choice = def(choice); | |
function match(str) { | |
var slen = str.length; | |
if ((pos + slen) > ilen) { | |
failed(quote(str), EOI); // exits | |
} | |
if (input.substr(pos, slen) === str) { | |
pos += slen; | |
return str; | |
} | |
failed(quote(str), cc()); | |
} | |
match = def(match); | |
function label(lbl, f) { | |
return cctx[lbl] = f(); | |
} | |
label = def(label); | |
function text(f) { | |
var p_pos = pos; // save previous position locally | |
f(); return input.substr(p_pos,pos-p_pos); | |
} | |
text = def(text); | |
function some(f) { | |
return [f()].concat(any(f)()); | |
} | |
some = def(some); | |
function any(f) { | |
var s = [], | |
missed = 0, | |
on_miss = function() { missed = 1; }; | |
while (!missed) { | |
s.push(safe(f, on_miss)); | |
} | |
if (missed) s.splice(-1); | |
return s; | |
} | |
any = def(any); | |
function re(rx, desc) { | |
var res, desc = desc || rx.source; | |
if (res = rx.exec(input.substr(pos))) { | |
if (res.index !== 0) failed(desc, cc()); | |
pos += res[0].length; | |
return res[0]; | |
} else failed(desc, cc()); | |
} | |
re = def(re); | |
/* =========== PARSER-INDEPENDENT CODE =========== */ | |
/* ----------- VARIABLES ----------- */ | |
var cache, // {}, rule results cache, by name/pos | |
ctx, // { ... }, total context | |
cctx, // { ... }, current context pointer | |
ctxl, // -1, context level | |
current, // '-', current rule name | |
alias; // '', current rule alias, if defined | |
var failures, // {}, failures data | |
rmfpos, // 0, rightmost failure position | |
nr; // 0, no-report, fire errors w/o reporting | |
var /*input, */ilen; // input, input length | |
/* ----------- CONTEXT ----------- */ | |
function ctx_lvl(parent) { | |
function CtxLevel() { | |
this.__p = parent; | |
this.__l = ++ctxl; | |
this.__c = null; | |
}; | |
CtxLevel.prototype = parent; | |
return new CtxLevel(); | |
} | |
function din() { // dive in | |
if (!cctx.__c) cctx.__c = ctx_lvl(cctx); | |
cctx = cctx.__c; | |
} | |
function dout() { // dive out | |
if (!cctx.__p) throw new Error('reached top context level'); | |
cctx = cctx.__p; --ctxl; | |
} | |
function inctx(f) { // execute in own context and return | |
var r, e; | |
din(); r = safe(f, function(err) { e = err; }); | |
dout(); if (e) throw e; | |
return r; | |
} | |
/* ----------- DEFERRED ----------- */ | |
// Makes passed function to save its argument values, | |
// but not execute until specially requested | |
function def(f) { | |
return function() { | |
return (function(f, args) { | |
return function() { return f.apply(null, args); }; | |
})(f, arguments); | |
} | |
} | |
/* ----------- RULES WRAPPER ----------- */ | |
var ckey; // cache key | |
for (var rule in rules) { | |
rules[rule] = (function(name, rule) { | |
return function() { current = name; return rule(); }; | |
})(rule, rules[rule]); | |
} | |
/* ----------- RESULT OBJECT + PARSE FUNCTION ----------- */ | |
var result = { | |
/* | |
* Parses the input with a generated parser. If the parsing is successfull, | |
* returns a value explicitly or implicitly specified by the grammar from | |
* which the parser was generated (see |PEG.buildParser|). If the parsing is | |
* unsuccessful, throws |PEG.parser.MatchFailed| describing the error. | |
*/ | |
parse: function(_input, _opts) { | |
var _opts = _opts || {}; | |
// initialize variables | |
pos = 0, p_pos = 0, input = _input, options = _opts; | |
ilen = input.length, failures = {}, rmfpos = 0, nr = 0; | |
__p_reset_coord(); | |
cache = {}; | |
ctxl = -1; ctx = ctx_lvl(), cctx = ctx; | |
current = '-'; | |
var startRule = _opts.startRule || "start"; | |
if (["start"].indexOf(startRule) < 0) { | |
throw new Error("Can't start parsing from rule " + quote(startRule) + "."); | |
} | |
// call user initializer and also | |
// get blocks lying in the same context | |
ƒ = __p_blocks(); | |
// find start rule | |
if (startRule) { | |
if (rules[startRule] === undefined) { | |
throw new SyntaxError("Rule not found: " + quote(startRule) + "."); | |
}; | |
} else { | |
throw new Error("Start rule is not defined in options, no 'start' rule found and first rule in grammar was empty"); | |
} | |
// and execute it | |
var res; | |
try { | |
res = rules[startRule](); | |
if ((pos < ilen) || | |
(res === null)) failed(EOI, cc()); | |
} catch(e) { | |
if (e instanceof MatchFailed) { | |
// throw rightmost error instead | |
throw adapt(failures[rmfpos]); | |
} | |
throw e; | |
} | |
return res; | |
}, | |
/* Returns the parser source code. */ | |
toSource: function() { return this._source; }, | |
/* makes error type accessible outside */ | |
MatchFailed: MatchFailed, | |
SyntaxError: SyntaxError | |
}; | |
/* ----------- UTILS ----------- */ | |
function Marker(human_str) { this.str=human_str; }; | |
Marker.prototype.toString = function() { return this.str; }; | |
var EOI = new Marker('end of input'), | |
ANY = new Marker('any character'), | |
SOMETHING = new Marker('progress'), | |
NOTHING = new Marker('nothing'); | |
function hexOf(ch) { | |
var x = ch.charCodeAt(0), | |
v = x.toString(16).toUpperCase(), | |
h = (x > 0xFF), | |
i = (h ? 4 : 2) - v.length; | |
while (i--) v = v + '0'; | |
return '\\' + (h ? 'u' : 'x') + v; | |
} | |
function quote(s) { | |
/* | |
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a | |
* string literal except for the closing quote character, backslash, | |
* carriage return, line separator, paragraph separator, and line feed. | |
* Any character may appear in the form of an escape sequence. | |
* | |
* For portability, we also escape escape all control and non-ASCII | |
* characters. Note that "\0" and "\v" escape sequences are not used | |
* because JSHint does not like the first and IE the second. | |
*/ | |
return '"' + s | |
.replace(/\\/g, '\\\\') // backslash | |
.replace(/"/g, '\\"') // closing quote character | |
.replace(/\x08/g, '\\b') // backspace | |
.replace(/\t/g, '\\t') // horizontal tab | |
.replace(/\n/g, '\\n') // line feed | |
.replace(/\f/g, '\\f') // form feed | |
.replace(/\r/g, '\\r') // carriage return | |
.replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, hexOf) | |
+ '"'; | |
} | |
/* ----------- FAILURES ----------- */ | |
function MatchFailed(what, found, expected) { | |
this.what = what; | |
this.expected = expected || []; | |
this.found = found; | |
this.offset = pos; | |
this.xpos = [-1, -1]; | |
this.line = -1; | |
this.column = -1; | |
} | |
MatchFailed.prototype = new Error(); | |
MatchFailed.prototype.toString = | |
function() { return 'MatchFailed: '+emsg(this); }; | |
var merr = function(fnd, exp) { | |
return new MatchFailed(alias || current, fnd, exp); | |
}; | |
function failed(expected, found) { | |
var expected = alias || expected; | |
// if no report required, just throw | |
if (nr) throw merr(found, [expected]); | |
if (pos > rmfpos) rmfpos = pos; | |
var e = failures[pos] || | |
(failures[pos] = merr(found)); | |
/*if (e.found !== found)*/ e.found = found; | |
var prev = e.expected; | |
var f; for (var i = prev.length; i--;) { | |
if (prev[i] === expected) { | |
f = 1; break; | |
} | |
}; if (!f) prev.push(expected); | |
throw e; | |
} | |
function safe(f, callback) { | |
try { return f(); | |
} catch(e) { | |
if (e instanceof MatchFailed) { | |
if (callback) callback(e); | |
} else { throw e; } | |
} | |
} | |
function emsg(e) { | |
var found_str, exp_str; | |
if (e.found instanceof Marker) { | |
found_str = e.found.str; | |
} else { | |
found_str = quote(e.found); | |
} | |
if (e.expected instanceof Marker) { | |
exp_str = e.expected.str; | |
} else if ((e.expected.length === 1) && | |
(e.expected[0] instanceof Marker)) { | |
exp_str = e.expected[0].str; | |
} else { | |
var xs = e.expected; | |
exp_str = ((xs.length > 1) | |
? (xs.slice(0,-1).join(', ')+' '+ | |
'or '+xs.slice(-1)) | |
: xs[0]); | |
} | |
return /*'Stopped at '+quote(e.what)+': */'Expected '+exp_str+ | |
' but '+found_str+' found.'; | |
} | |
function adapt(e) { | |
var xpos = __p_coord(e.offset); | |
e.xpos = xpos; e.line = xpos[1]; e.column = xpos[0]; | |
e.message = emsg(e); | |
if ((e.found instanceof Marker) && (e.found === EOI)) e.found = null; | |
var xs = e.expected.sort(); | |
if ((xs.length === 1) && | |
(xs[0] === EOI)) { | |
e.expected = []; | |
} | |
for (var i = xs.length; i--;) | |
{ if (xs[i] instanceof Marker) xs[i] = xs[i].str; }; | |
return e; | |
} | |
function SyntaxError(msg) { // may be thrown from parser | |
this.message = msg; | |
} | |
SyntaxError.prototype = new Error(); | |
SyntaxError.prototype.toString = | |
function() { return 'SyntaxError: '+this.message; }; | |
/* ---------- RETURN RESULT OBJECT ----------- */ | |
return result; | |
}; | |
/* ----------- COORDINATES CALCULATION ----------- */ | |
// a function to find line-column position from a char-based position | |
var __p_coord_cache; // cache of 2d position: [ last_pos, column, line, seen_cr ] | |
function __p_reset_coord() { __p_coord_cache = [ 0, 1, 1, 0 ]; }; | |
__p_reset_coord(); | |
function __p_coord(pos) { | |
/* | |
* The first idea was to use |String.split| to break the input up to the | |
* error position along newlines and derive the line and column from | |
* there. However IE's |split| implementation is so broken that it was | |
* enough to prevent it. | |
*/ | |
var cl = 1, ln = 1, | |
cr = 0, // bool, was CR found or not? | |
c = __p_coord_cache; | |
if (pos !== c[0]) { | |
if (pos < c[0]) { | |
__p_reset_coord(); | |
} else { | |
cl = c[1], ln = c[2], cr = c[3]; | |
} | |
var from = c[0], to = pos; | |
for (var i = from, ch; i < to; i++) { | |
ch = input.charAt(i); | |
if (ch === "\n") { | |
if (!cr) { ln++; } | |
cl = 1; cr = 0; | |
} else if (ch === "\r" || ch === "\u2028" || ch === "\u2029") { | |
ln++; cl = 1; cr = 1; | |
} else /*if (ch.length)*/ { | |
cl++; cr = 0; | |
} | |
} | |
__p_coord_cache = [ pos, cl, ln, cr ]; | |
return [ cl, ln ]; | |
} else return [ c[1], c[2] ]; | |
} | |
/* ----------- RETURN PARSER ----------- */ | |
return __parser(); | |
})();; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment