Skip to content

Instantly share code, notes, and snippets.

@shamansir
Last active December 27, 2015 01:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shamansir/7249136 to your computer and use it in GitHub Desktop.
Save shamansir/7249136 to your computer and use it in GitHub Desktop.
An example of generated parser for PEGjs-fn (http://github.com/shamansir/pegjs-fn)
/*
* Classic example grammar, which recognizes simple arithmetic expressions like
* "2*(3+4)". The parser generated from this grammar then computes their value.
*/
start
= additive
additive
= left:multiplicative "+" right:additive { return left + right; }
/ multiplicative
multiplicative
= left:primary "*" right:multiplicative { return left * right; }
/ primary
primary
= integer
/ "(" additive:additive ")" { return additive; }
integer "integer"
= digits:$[0-9]+ { return parseInt(digits, 10); }
/* ----------- BLOCKS ----------- */
"additive": [
function(č) {
// additive[0]
return (function(left,right) { return left + right; })(č.left,č.right);
}
],
"multiplicative": [
function(č) {
// multiplicative[0]
return (function(left,right) { return left * right; })(č.left,č.right);
}
],
"primary": [
function(č) {
// primary[0]
return (function(additive) { return additive; })(č.additive);
}
],
"integer": [
function(č) {
// integer[0]
return (function(digits) { return parseInt(digits, 10); })(č.digits);
}
]
/* ----------- RULES DEFINITIONS ----------- */
rules.start = function() {
return (
ref(rules.additive)
());
}
rules.additive = function() {
var _code = ƒ.additive;
return (
choice(
action(
seqnc(
label("left",
ref(rules.multiplicative)
),
match("+"),
label("right",
ref(rules.additive)
)
),
_code[0])
/*{ return left + right; }*/,
ref(rules.multiplicative)
)
());
}
rules.multiplicative = function() {
var _code = ƒ.multiplicative;
return (
choice(
action(
seqnc(
label("left",
ref(rules.primary)
),
match("*"),
label("right",
ref(rules.multiplicative)
)
),
_code[0])
/*{ return left * right; }*/,
ref(rules.primary)
)
());
}
rules.primary = function() {
var _code = ƒ.primary;
return (
choice(
ref(rules.integer),
action(
seqnc(
match("("),
label("additive",
ref(rules.additive)
),
match(")")
),
_code[0])
/*{ return additive; }*/
)
());
}
rules.integer = function() {
var _code = ƒ.integer;
return (
as("integer",
action(
label("digits",
text(
some(
re(/^[0-9]/, "[0-9]")
)
)
),
_code[0])
/*{ return parseInt(digits, 10); }*/
)
());
}
module.exports = (function(){
/* Generated by PEG.js-fn @VERSION (http://pegjs.majda.cz/). */
/* Functional modification by shaman.sir@gmail.com (http://shamansir.github.com/). */
/* ########### ENVIRONMENT ########### */
var input,
options;
var pos, // 0, parser position
p_pos; // 0, previous parser position
// This code encloses all of the user blocks (initializer and/or actions)
// in their own sandbox, so if there is an initializer, its inner variables
// will [only] be accessible to actions; this, however, requires an initializer
// not to have any first-level return statements. Also, this approach keeps parser
// inner variables safe from user access, except the ones defined above.
var __p_blocks = (function() { return function() {
// backwards compatibility with original peg-js
function offset() { return p_pos; };
function text() { return input.substring(p_pos, pos); };
function line() { return __p_coord(p_pos)[1]; };
function column() { return __p_coord(p_pos)[0]; };
function cell() { return __p_coord(p_pos); };
/* ########### USER CODE ########### */
/* ----------- BLOCKS ----------- */
// Blocks are grouped by rule name and id; they all get access to current context
// through č variable which they expand into their arguments. Arguments
// names are precalculated during parser generation process.
// ƒ and č variables are named so creepy just to ensure that parser writer will not use them
// for naming variables in his code (only č may clash in this architecture, in fact),
// we hope any modern environment supports Unicode now
return {
"additive": [
function(č) {
// additive[0]
return (function(left,right) {
return left + right;
})(č.left,č.right);
}
],
"multiplicative": [
function(č) {
// multiplicative[0]
return (function(left,right) {
return left * right;
})(č.left,č.right);
}
],
"primary": [
function(č) {
// primary[0]
return (function(additive) {
return additive;
})(č.additive);
}
],
"integer": [
function(č) {
// integer[0]
return (function(digits) {
return parseInt(digits, 10);
})(č.digits);
}
]
};
} })();
// ƒ and č variables are named so creepy just to ensure that parser writer will not use them
// for naming variables in his code (only č may clash in this architecture, in fact),
// we hope any modern environment supports Unicode now
var ƒ = null; // holds a pointer to current rule blocks, will be initialized in parse() function
/* ########### PARSER ########### */
var __parser = function() {
/* =========== PARSER-DEPENDENT CODE =========== */
/* ----------- RULES DEFINITIONS ----------- */
var rules = {}; (function() {
rules.start = function() {
return (
ref(rules.additive)
());
}
rules.additive = function() {
var _code = ƒ.additive;
return (
choice(
action(
seqnc(
label("left",
ref(rules.multiplicative)
),
match("+"),
label("right",
ref(rules.additive)
)
),
_code[0])
/*{ return left + right; }*/,
ref(rules.multiplicative)
)
());
}
rules.multiplicative = function() {
var _code = ƒ.multiplicative;
return (
choice(
action(
seqnc(
label("left",
ref(rules.primary)
),
match("*"),
label("right",
ref(rules.multiplicative)
)
),
_code[0])
/*{ return left * right; }*/,
ref(rules.primary)
)
());
}
rules.primary = function() {
var _code = ƒ.primary;
return (
choice(
ref(rules.integer),
action(
seqnc(
match("("),
label("additive",
ref(rules.additive)
),
match(")")
),
_code[0])
/*{ return additive; }*/
)
());
}
rules.integer = function() {
var _code = ƒ.integer;
return (
as("integer",
action(
label("digits",
text(
some(
re(/^[0-9]/, "[0-9]")
)
)
),
_code[0])
/*{ return parseInt(digits, 10); }*/
)
());
}
})();
/* ----------- OPERATORS ----------- */
// get current char
function cc() { return (pos < ilen) ? input.charAt(pos) : EOI; }
var ref = def(inctx); // will call rule inside context
function action(f, code) {
return inctx(function() {
p_pos = pos; var res; // save previous position
f(); res = code(cctx);
if (res === null) { pos = p_pos;
failed(SOMETHING, NOTHING); }
return res;
});
}
action = def(action);
function seqnc(/*f...*/) {
var p_pos = pos; // save previous position locally
var fs = arguments,
s = [],
on_miss = function(e) {
pos = p_pos; throw e; };
for (var fi = 0, fl = fs.length;
fi < fl; fi++) {
s.push(safe(fs[fi], on_miss));
}
return s;
}
seqnc = def(seqnc);
function as(name, f) {
alias = name; var res = f();
alias = ''; return res;
}
as = def(as);
function choice(/*f...*/) {
var fs = arguments,
missed = 0,
my_e = null,
on_miss = function(e) {
my_e = e; missed = 1;
};
for (var fi = 0, fl = fs.length;
fi < fl; fi++) {
var res = safe(fs[fi], on_miss);
if (!missed) return res;
missed = 0;
}
throw my_e;
}
choice = def(choice);
function match(str) {
var slen = str.length;
if ((pos + slen) > ilen) {
failed(quote(str), EOI); // exits
}
if (input.substr(pos, slen) === str) {
pos += slen;
return str;
}
failed(quote(str), cc());
}
match = def(match);
function label(lbl, f) {
return cctx[lbl] = f();
}
label = def(label);
function text(f) {
var p_pos = pos; // save previous position locally
f(); return input.substr(p_pos,pos-p_pos);
}
text = def(text);
function some(f) {
return [f()].concat(any(f)());
}
some = def(some);
function any(f) {
var s = [],
missed = 0,
on_miss = function() { missed = 1; };
while (!missed) {
s.push(safe(f, on_miss));
}
if (missed) s.splice(-1);
return s;
}
any = def(any);
function re(rx, desc) {
var res, desc = desc || rx.source;
if (res = rx.exec(input.substr(pos))) {
if (res.index !== 0) failed(desc, cc());
pos += res[0].length;
return res[0];
} else failed(desc, cc());
}
re = def(re);
/* =========== PARSER-INDEPENDENT CODE =========== */
/* ----------- VARIABLES ----------- */
var cache, // {}, rule results cache, by name/pos
ctx, // { ... }, total context
cctx, // { ... }, current context pointer
ctxl, // -1, context level
current, // '-', current rule name
alias; // '', current rule alias, if defined
var failures, // {}, failures data
rmfpos, // 0, rightmost failure position
nr; // 0, no-report, fire errors w/o reporting
var /*input, */ilen; // input, input length
/* ----------- CONTEXT ----------- */
function ctx_lvl(parent) {
function CtxLevel() {
this.__p = parent;
this.__l = ++ctxl;
this.__c = null;
};
CtxLevel.prototype = parent;
return new CtxLevel();
}
function din() { // dive in
if (!cctx.__c) cctx.__c = ctx_lvl(cctx);
cctx = cctx.__c;
}
function dout() { // dive out
if (!cctx.__p) throw new Error('reached top context level');
cctx = cctx.__p; --ctxl;
}
function inctx(f) { // execute in own context and return
var r, e;
din(); r = safe(f, function(err) { e = err; });
dout(); if (e) throw e;
return r;
}
/* ----------- DEFERRED ----------- */
// Makes passed function to save its argument values,
// but not execute until specially requested
function def(f) {
return function() {
return (function(f, args) {
return function() { return f.apply(null, args); };
})(f, arguments);
}
}
/* ----------- RULES WRAPPER ----------- */
var ckey; // cache key
for (var rule in rules) {
rules[rule] = (function(name, rule) {
return function() { current = name; return rule(); };
})(rule, rules[rule]);
}
/* ----------- RESULT OBJECT + PARSE FUNCTION ----------- */
var result = {
/*
* Parses the input with a generated parser. If the parsing is successfull,
* returns a value explicitly or implicitly specified by the grammar from
* which the parser was generated (see |PEG.buildParser|). If the parsing is
* unsuccessful, throws |PEG.parser.MatchFailed| describing the error.
*/
parse: function(_input, _opts) {
var _opts = _opts || {};
// initialize variables
pos = 0, p_pos = 0, input = _input, options = _opts;
ilen = input.length, failures = {}, rmfpos = 0, nr = 0;
__p_reset_coord();
cache = {};
ctxl = -1; ctx = ctx_lvl(), cctx = ctx;
current = '-';
var startRule = _opts.startRule || "start";
if (["start"].indexOf(startRule) < 0) {
throw new Error("Can't start parsing from rule " + quote(startRule) + ".");
}
// call user initializer and also
// get blocks lying in the same context
ƒ = __p_blocks();
// find start rule
if (startRule) {
if (rules[startRule] === undefined) {
throw new SyntaxError("Rule not found: " + quote(startRule) + ".");
};
} else {
throw new Error("Start rule is not defined in options, no 'start' rule found and first rule in grammar was empty");
}
// and execute it
var res;
try {
res = rules[startRule]();
if ((pos < ilen) ||
(res === null)) failed(EOI, cc());
} catch(e) {
if (e instanceof MatchFailed) {
// throw rightmost error instead
throw adapt(failures[rmfpos]);
}
throw e;
}
return res;
},
/* Returns the parser source code. */
toSource: function() { return this._source; },
/* makes error type accessible outside */
MatchFailed: MatchFailed,
SyntaxError: SyntaxError
};
/* ----------- UTILS ----------- */
function Marker(human_str) { this.str=human_str; };
Marker.prototype.toString = function() { return this.str; };
var EOI = new Marker('end of input'),
ANY = new Marker('any character'),
SOMETHING = new Marker('progress'),
NOTHING = new Marker('nothing');
function hexOf(ch) {
var x = ch.charCodeAt(0),
v = x.toString(16).toUpperCase(),
h = (x > 0xFF),
i = (h ? 4 : 2) - v.length;
while (i--) v = v + '0';
return '\\' + (h ? 'u' : 'x') + v;
}
function quote(s) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a
* string literal except for the closing quote character, backslash,
* carriage return, line separator, paragraph separator, and line feed.
* Any character may appear in the form of an escape sequence.
*
* For portability, we also escape escape all control and non-ASCII
* characters. Note that "\0" and "\v" escape sequences are not used
* because JSHint does not like the first and IE the second.
*/
return '"' + s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\x08/g, '\\b') // backspace
.replace(/\t/g, '\\t') // horizontal tab
.replace(/\n/g, '\\n') // line feed
.replace(/\f/g, '\\f') // form feed
.replace(/\r/g, '\\r') // carriage return
.replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, hexOf)
+ '"';
}
/* ----------- FAILURES ----------- */
function MatchFailed(what, found, expected) {
this.what = what;
this.expected = expected || [];
this.found = found;
this.offset = pos;
this.xpos = [-1, -1];
this.line = -1;
this.column = -1;
}
MatchFailed.prototype = new Error();
MatchFailed.prototype.toString =
function() { return 'MatchFailed: '+emsg(this); };
var merr = function(fnd, exp) {
return new MatchFailed(alias || current, fnd, exp);
};
function failed(expected, found) {
var expected = alias || expected;
// if no report required, just throw
if (nr) throw merr(found, [expected]);
if (pos > rmfpos) rmfpos = pos;
var e = failures[pos] ||
(failures[pos] = merr(found));
/*if (e.found !== found)*/ e.found = found;
var prev = e.expected;
var f; for (var i = prev.length; i--;) {
if (prev[i] === expected) {
f = 1; break;
}
}; if (!f) prev.push(expected);
throw e;
}
function safe(f, callback) {
try { return f();
} catch(e) {
if (e instanceof MatchFailed) {
if (callback) callback(e);
} else { throw e; }
}
}
function emsg(e) {
var found_str, exp_str;
if (e.found instanceof Marker) {
found_str = e.found.str;
} else {
found_str = quote(e.found);
}
if (e.expected instanceof Marker) {
exp_str = e.expected.str;
} else if ((e.expected.length === 1) &&
(e.expected[0] instanceof Marker)) {
exp_str = e.expected[0].str;
} else {
var xs = e.expected;
exp_str = ((xs.length > 1)
? (xs.slice(0,-1).join(', ')+' '+
'or '+xs.slice(-1))
: xs[0]);
}
return /*'Stopped at '+quote(e.what)+': */'Expected '+exp_str+
' but '+found_str+' found.';
}
function adapt(e) {
var xpos = __p_coord(e.offset);
e.xpos = xpos; e.line = xpos[1]; e.column = xpos[0];
e.message = emsg(e);
if ((e.found instanceof Marker) && (e.found === EOI)) e.found = null;
var xs = e.expected.sort();
if ((xs.length === 1) &&
(xs[0] === EOI)) {
e.expected = [];
}
for (var i = xs.length; i--;)
{ if (xs[i] instanceof Marker) xs[i] = xs[i].str; };
return e;
}
function SyntaxError(msg) { // may be thrown from parser
this.message = msg;
}
SyntaxError.prototype = new Error();
SyntaxError.prototype.toString =
function() { return 'SyntaxError: '+this.message; };
/* ---------- RETURN RESULT OBJECT ----------- */
return result;
};
/* ----------- COORDINATES CALCULATION ----------- */
// a function to find line-column position from a char-based position
var __p_coord_cache; // cache of 2d position: [ last_pos, column, line, seen_cr ]
function __p_reset_coord() { __p_coord_cache = [ 0, 1, 1, 0 ]; };
__p_reset_coord();
function __p_coord(pos) {
/*
* The first idea was to use |String.split| to break the input up to the
* error position along newlines and derive the line and column from
* there. However IE's |split| implementation is so broken that it was
* enough to prevent it.
*/
var cl = 1, ln = 1,
cr = 0, // bool, was CR found or not?
c = __p_coord_cache;
if (pos !== c[0]) {
if (pos < c[0]) {
__p_reset_coord();
} else {
cl = c[1], ln = c[2], cr = c[3];
}
var from = c[0], to = pos;
for (var i = from, ch; i < to; i++) {
ch = input.charAt(i);
if (ch === "\n") {
if (!cr) { ln++; }
cl = 1; cr = 0;
} else if (ch === "\r" || ch === "\u2028" || ch === "\u2029") {
ln++; cl = 1; cr = 1;
} else /*if (ch.length)*/ {
cl++; cr = 0;
}
}
__p_coord_cache = [ pos, cl, ln, cr ];
return [ cl, ln ];
} else return [ c[1], c[2] ];
}
/* ----------- RETURN PARSER ----------- */
return __parser();
})();;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment