Skip to content

Instantly share code, notes, and snippets.

@fauxneticien
Last active March 30, 2022 16:56
Show Gist options
  • Save fauxneticien/2b2dafd8afcecf5c4ab1807d58609e13 to your computer and use it in GitHub Desktop.
Save fauxneticien/2b2dafd8afcecf5c4ab1807d58609e13 to your computer and use it in GitHub Desktop.
String parsing using Nearley within R
library(tidyverse)
library(zoo)
library(V8)
lexicon <-
'\\lx rouge
\\ps adjective
\\de red
\\xv La chaise est rouge
\\xe The chair is red
\\lx bonjour
\\de hello
\\ps exclamation
\\lx parler
\\ps verb
\\de speak
\\xv Parlez-vous français?
'
lexicon_df <-
read_lines(lexicon) %>%
tibble(line = 1:length(.), data = .) %>%
extract(col = data,
regex = "\\\\([a-z]+)\\s(.*)",
into = c("code", "value"),
remove = F) %>%
mutate(lx_id = ifelse(code == "lx", line, NA) %>% na.locf(na.rm = F))
# Check all codes have been entered in order
lexicon_grammar <- '
lexeme -> "lx" _ "ps" _ "de" _ examples:?
examples -> ("xv" _ "xe" _):+
_ -> " " | null
'
source("https://git.io/vAFux") # source compile_grammar function from GitHub gist
parser <- compile_grammar(lexicon_grammar)
lexicon_df %>%
filter(!is.na(code)) %>%
group_by(lx_id) %>%
summarise(code_sequence = paste0(code, collapse = " ")) %>%
rowwise() %>%
mutate(
parsed_sequence = parser(code_sequence, stop_on_error = F),
valid_sequence = is.list(parsed_sequence)
)
compile_grammar <- function(nearley_str) {
ctx <- v8(global = "window")
# for reproducible example, source JS from gist https://git.io/vAFRK
# otherwise, source bundle.js
ctx$source("https://git.io/vAFRV")
function(test_string, stop_on_error = TRUE) {
# JS parser object maintains state, so instantiate a new parser, c.f. https://github.com/kach/nearley/issues/156
ctx$assign("nearley_str", nearley_str)
ctx$assign("grammar", JS('compileGrammar(nearley_str)'))
ctx$assign("parser", JS('new nearley.Parser(nearley.Grammar.fromCompiled(grammar), { keepHistory: true })'))
ctx$assign("test_string", test_string)
parse_result <- tryCatch({
ctx$eval(JS('parser.feed(test_string)'))
}, error = function(e) {
if(stop_on_error) {
stop(e)
} else {
return(list(error = e$message))
}
})
if("error" %in% names(parse_result)) {
# This will be an error message returned from tryCatch code
parse_result
} else {
parse_tree <- ctx$get("parser.results")
if(length(parse_tree) == 0) {
warn_text <- paste0("Error: Parse incomplete, expecting more text at end of string: '", test_string, "'")
warning(warn_text)
list(warning = warn_text)
} else {
parse_tree
}
}
}
}
lexeme -> "lx" _NL "ps" _NL "de" _NL examples:?
examples -> ("xv" _NL "xe" _NL):+
_NL -> "\n"
# Basic grammar as used in https://omrelli.ug/nearley-playground/
MAIN -> SENTENCE "."
SENTENCE -> SUB _ VERB _ MOD
SUB -> "My dog" | "Charles" | "A typical Reddit user"
VERB -> "sleeps" | "thinks" | "cries" | "tweets" | "believes in ponies"
MOD -> "with" _ OBJ | "while thinking about" _ OBJ | "better than" _ OBJ _ "can" | "agressively" | "but" _ SENTENCE
OBJ -> "a hammer" | "nobody" | "snakes"
_ -> " "
// Generated using command: browserify in.js -o bundle.js
(function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
(function (global){
global.nearley =require('nearley');
global.compile = require("nearley/lib/compile");
global.generate = require("nearley/lib/generate");
global.nearleyGrammar = require("nearley/lib/nearley-language-bootstrapped");
global.compileGrammar = function (sourceCode) {
// Parse the grammar source into an AST
const grammarParser = new nearley.Parser(nearleyGrammar);
grammarParser.feed(sourceCode);
const grammarAst = grammarParser.results[0]; // TODO check for errors
// Compile the AST into a set of rules
const grammarInfoObject = compile(grammarAst, {});
// Generate JavaScript code from the rules
const grammarJs = generate(grammarInfoObject, "grammar");
// Pretend this is a CommonJS environment to catch exports from the grammar.
const module = { exports: {} };
eval(grammarJs);
return module.exports;
}
}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
},{"nearley":9,"nearley/lib/compile":5,"nearley/lib/generate":6,"nearley/lib/nearley-language-bootstrapped":8}],2:[function(require,module,exports){
},{}],3:[function(require,module,exports){
(function (process){
// Copyright Joyent, Inc. and other Node contributors.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to permit
// persons to whom the Software is furnished to do so, subject to the
// following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
// USE OR OTHER DEALINGS IN THE SOFTWARE.
// resolves . and .. elements in a path array with directory names there
// must be no slashes, empty elements, or device names (c:\) in the array
// (so also no leading and trailing slashes - it does not distinguish
// relative and absolute paths)
function normalizeArray(parts, allowAboveRoot) {
// if the path tries to go above the root, `up` ends up > 0
var up = 0;
for (var i = parts.length - 1; i >= 0; i--) {
var last = parts[i];
if (last === '.') {
parts.splice(i, 1);
} else if (last === '..') {
parts.splice(i, 1);
up++;
} else if (up) {
parts.splice(i, 1);
up--;
}
}
// if the path is allowed to go above the root, restore leading ..s
if (allowAboveRoot) {
for (; up--; up) {
parts.unshift('..');
}
}
return parts;
}
// Split a filename into [root, dir, basename, ext], unix version
// 'root' is just a slash, or nothing.
var splitPathRe =
/^(\/?|)([\s\S]*?)((?:\.{1,2}|[^\/]+?|)(\.[^.\/]*|))(?:[\/]*)$/;
var splitPath = function(filename) {
return splitPathRe.exec(filename).slice(1);
};
// path.resolve([from ...], to)
// posix version
exports.resolve = function() {
var resolvedPath = '',
resolvedAbsolute = false;
for (var i = arguments.length - 1; i >= -1 && !resolvedAbsolute; i--) {
var path = (i >= 0) ? arguments[i] : process.cwd();
// Skip empty and invalid entries
if (typeof path !== 'string') {
throw new TypeError('Arguments to path.resolve must be strings');
} else if (!path) {
continue;
}
resolvedPath = path + '/' + resolvedPath;
resolvedAbsolute = path.charAt(0) === '/';
}
// At this point the path should be resolved to a full absolute path, but
// handle relative paths to be safe (might happen when process.cwd() fails)
// Normalize the path
resolvedPath = normalizeArray(filter(resolvedPath.split('/'), function(p) {
return !!p;
}), !resolvedAbsolute).join('/');
return ((resolvedAbsolute ? '/' : '') + resolvedPath) || '.';
};
// path.normalize(path)
// posix version
exports.normalize = function(path) {
var isAbsolute = exports.isAbsolute(path),
trailingSlash = substr(path, -1) === '/';
// Normalize the path
path = normalizeArray(filter(path.split('/'), function(p) {
return !!p;
}), !isAbsolute).join('/');
if (!path && !isAbsolute) {
path = '.';
}
if (path && trailingSlash) {
path += '/';
}
return (isAbsolute ? '/' : '') + path;
};
// posix version
exports.isAbsolute = function(path) {
return path.charAt(0) === '/';
};
// posix version
exports.join = function() {
var paths = Array.prototype.slice.call(arguments, 0);
return exports.normalize(filter(paths, function(p, index) {
if (typeof p !== 'string') {
throw new TypeError('Arguments to path.join must be strings');
}
return p;
}).join('/'));
};
// path.relative(from, to)
// posix version
exports.relative = function(from, to) {
from = exports.resolve(from).substr(1);
to = exports.resolve(to).substr(1);
function trim(arr) {
var start = 0;
for (; start < arr.length; start++) {
if (arr[start] !== '') break;
}
var end = arr.length - 1;
for (; end >= 0; end--) {
if (arr[end] !== '') break;
}
if (start > end) return [];
return arr.slice(start, end - start + 1);
}
var fromParts = trim(from.split('/'));
var toParts = trim(to.split('/'));
var length = Math.min(fromParts.length, toParts.length);
var samePartsLength = length;
for (var i = 0; i < length; i++) {
if (fromParts[i] !== toParts[i]) {
samePartsLength = i;
break;
}
}
var outputParts = [];
for (var i = samePartsLength; i < fromParts.length; i++) {
outputParts.push('..');
}
outputParts = outputParts.concat(toParts.slice(samePartsLength));
return outputParts.join('/');
};
exports.sep = '/';
exports.delimiter = ':';
exports.dirname = function(path) {
var result = splitPath(path),
root = result[0],
dir = result[1];
if (!root && !dir) {
// No dirname whatsoever
return '.';
}
if (dir) {
// It has a dirname, strip trailing slash
dir = dir.substr(0, dir.length - 1);
}
return root + dir;
};
exports.basename = function(path, ext) {
var f = splitPath(path)[2];
// TODO: make this comparison case-insensitive on windows?
if (ext && f.substr(-1 * ext.length) === ext) {
f = f.substr(0, f.length - ext.length);
}
return f;
};
exports.extname = function(path) {
return splitPath(path)[3];
};
function filter (xs, f) {
if (xs.filter) return xs.filter(f);
var res = [];
for (var i = 0; i < xs.length; i++) {
if (f(xs[i], i, xs)) res.push(xs[i]);
}
return res;
}
// String.prototype.substr - negative index don't work in IE8
var substr = 'ab'.substr(-1) === 'b'
? function (str, start, len) { return str.substr(start, len) }
: function (str, start, len) {
if (start < 0) start = str.length + start;
return str.substr(start, len);
}
;
}).call(this,require('_process'))
},{"_process":4}],4:[function(require,module,exports){
// shim for using process in browser
var process = module.exports = {};
// cached from whatever global is present so that test runners that stub it
// don't break things. But we need to wrap it in a try catch in case it is
// wrapped in strict mode code which doesn't define any globals. It's inside a
// function because try/catches deoptimize in certain engines.
var cachedSetTimeout;
var cachedClearTimeout;
function defaultSetTimout() {
throw new Error('setTimeout has not been defined');
}
function defaultClearTimeout () {
throw new Error('clearTimeout has not been defined');
}
(function () {
try {
if (typeof setTimeout === 'function') {
cachedSetTimeout = setTimeout;
} else {
cachedSetTimeout = defaultSetTimout;
}
} catch (e) {
cachedSetTimeout = defaultSetTimout;
}
try {
if (typeof clearTimeout === 'function') {
cachedClearTimeout = clearTimeout;
} else {
cachedClearTimeout = defaultClearTimeout;
}
} catch (e) {
cachedClearTimeout = defaultClearTimeout;
}
} ())
function runTimeout(fun) {
if (cachedSetTimeout === setTimeout) {
//normal enviroments in sane situations
return setTimeout(fun, 0);
}
// if setTimeout wasn't available but was latter defined
if ((cachedSetTimeout === defaultSetTimout || !cachedSetTimeout) && setTimeout) {
cachedSetTimeout = setTimeout;
return setTimeout(fun, 0);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedSetTimeout(fun, 0);
} catch(e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedSetTimeout.call(null, fun, 0);
} catch(e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error
return cachedSetTimeout.call(this, fun, 0);
}
}
}
function runClearTimeout(marker) {
if (cachedClearTimeout === clearTimeout) {
//normal enviroments in sane situations
return clearTimeout(marker);
}
// if clearTimeout wasn't available but was latter defined
if ((cachedClearTimeout === defaultClearTimeout || !cachedClearTimeout) && clearTimeout) {
cachedClearTimeout = clearTimeout;
return clearTimeout(marker);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedClearTimeout(marker);
} catch (e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedClearTimeout.call(null, marker);
} catch (e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error.
// Some versions of I.E. have different rules for clearTimeout vs setTimeout
return cachedClearTimeout.call(this, marker);
}
}
}
var queue = [];
var draining = false;
var currentQueue;
var queueIndex = -1;
function cleanUpNextTick() {
if (!draining || !currentQueue) {
return;
}
draining = false;
if (currentQueue.length) {
queue = currentQueue.concat(queue);
} else {
queueIndex = -1;
}
if (queue.length) {
drainQueue();
}
}
function drainQueue() {
if (draining) {
return;
}
var timeout = runTimeout(cleanUpNextTick);
draining = true;
var len = queue.length;
while(len) {
currentQueue = queue;
queue = [];
while (++queueIndex < len) {
if (currentQueue) {
currentQueue[queueIndex].run();
}
}
queueIndex = -1;
len = queue.length;
}
currentQueue = null;
draining = false;
runClearTimeout(timeout);
}
process.nextTick = function (fun) {
var args = new Array(arguments.length - 1);
if (arguments.length > 1) {
for (var i = 1; i < arguments.length; i++) {
args[i - 1] = arguments[i];
}
}
queue.push(new Item(fun, args));
if (queue.length === 1 && !draining) {
runTimeout(drainQueue);
}
};
// v8 likes predictible objects
function Item(fun, array) {
this.fun = fun;
this.array = array;
}
Item.prototype.run = function () {
this.fun.apply(null, this.array);
};
process.title = 'browser';
process.browser = true;
process.env = {};
process.argv = [];
process.version = ''; // empty string to avoid regexp issues
process.versions = {};
function noop() {}
process.on = noop;
process.addListener = noop;
process.once = noop;
process.off = noop;
process.removeListener = noop;
process.removeAllListeners = noop;
process.emit = noop;
process.prependListener = noop;
process.prependOnceListener = noop;
process.listeners = function (name) { return [] }
process.binding = function (name) {
throw new Error('process.binding is not supported');
};
process.cwd = function () { return '/' };
process.chdir = function (dir) {
throw new Error('process.chdir is not supported');
};
process.umask = function() { return 0; };
},{}],5:[function(require,module,exports){
(function (process,__dirname){
(function(root, factory) {
if (typeof module === 'object' && module.exports) {
module.exports = factory(require('./nearley'));
} else {
root.Compile = factory(root.nearley);
}
}(this, function(nearley) {
function Compile(structure, opts) {
var unique = uniquer();
if (!opts.alreadycompiled) {
opts.alreadycompiled = [];
}
var result = {
rules: [],
body: [], // @directives list
customTokens: [], // %tokens
config: {}, // @config value
macros: {},
start: ''
};
for (var i = 0; i < structure.length; i++) {
var productionRule = structure[i];
if (productionRule.body) {
// This isn't a rule, it's an @directive.
if (!opts.nojs) {
result.body.push(productionRule.body);
}
} else if (productionRule.include) {
// Include file
var path;
if (!productionRule.builtin) {
path = require('path').resolve(
opts.file ? require('path').dirname(opts.file) : process.cwd(),
productionRule.include
);
} else {
path = require('path').resolve(
__dirname,
'../builtin/',
productionRule.include
);
}
if (opts.alreadycompiled.indexOf(path) === -1) {
opts.alreadycompiled.push(path);
f = require('fs').readFileSync(path).toString();
var parserGrammar = new require('./nearley-language-bootstrapped.js');
var parser = new nearley.Parser(parserGrammar.ParserRules, parserGrammar.ParserStart);
parser.feed(f);
var c = Compile(parser.results[0], {path: path, __proto__:opts});
require('./lint.js')(c, {out: process.stderr});
result.rules = result.rules.concat(c.rules);
result.body = result.body.concat(c.body);
result.customTokens = result.customTokens.concat(c.customTokens);
Object.keys(c.config).forEach(function(k) {
result.config[k] = c.config[k];
});
Object.keys(c.macros).forEach(function(k) {
result.macros[k] = c.macros[k];
});
}
} else if (productionRule.macro) {
result.macros[productionRule.macro] = {
'args': productionRule.args,
'exprs': productionRule.exprs
};
} else if (productionRule.config) {
// This isn't a rule, it's an @config.
result.config[productionRule.config] = productionRule.value
} else {
produceRules(productionRule.name, productionRule.rules, {});
if (!result.start) {
result.start = productionRule.name;
}
}
}
return result;
function produceRules(name, rules, env) {
for (var i = 0; i < rules.length; i++) {
var rule = buildRule(name, rules[i], env);
if (opts.nojs) {
rule.postprocess = null;
}
result.rules.push(rule);
}
}
function buildRule(ruleName, rule, env) {
var tokens = [];
for (var i = 0; i < rule.tokens.length; i++) {
var token = buildToken(ruleName, rule.tokens[i], env);
if (token !== null) {
tokens.push(token);
}
}
return new nearley.Rule(
ruleName,
tokens,
rule.postprocess
);
}
function buildToken(ruleName, token, env) {
if (typeof token === 'string') {
if (token === 'null') {
return null;
}
return token;
}
if (token instanceof RegExp) {
return token;
}
if (token.literal) {
if (!token.literal.length) {
return null;
}
if (token.literal.length === 1 || result.config.lexer) {
return token;
}
return buildStringToken(ruleName, token, env);
}
if (token.token) {
if (result.config.lexer) {
var name = token.token;
if (result.customTokens.indexOf(name) === -1) {
result.customTokens.push(name);
}
var expr = result.config.lexer + ".has(" + JSON.stringify(name) + ") ? {type: " + JSON.stringify(name) + "} : " + name;
return {token: "(" + expr + ")"};
}
return token;
}
if (token.subexpression) {
return buildSubExpressionToken(ruleName, token, env);
}
if (token.ebnf) {
return buildEBNFToken(ruleName, token, env);
}
if (token.macrocall) {
return buildMacroCallToken(ruleName, token, env);
}
if (token.mixin) {
if (env[token.mixin]) {
return buildToken(ruleName, env[token.mixin], env);
} else {
throw new Error("Unbound variable: " + token.mixin);
}
}
throw new Error("unrecognized token: " + JSON.stringify(token));
}
function buildStringToken(ruleName, token, env) {
var newname = unique(ruleName + "$string");
produceRules(newname, [
{
tokens: token.literal.split("").map(function charLiteral(d) {
return {
literal: d
};
}),
postprocess: {builtin: "joiner"}
}
], env);
return newname;
}
function buildSubExpressionToken(ruleName, token, env) {
var data = token.subexpression;
var name = unique(ruleName + "$subexpression");
//structure.push({"name": name, "rules": data});
produceRules(name, data, env);
return name;
}
function buildEBNFToken(ruleName, token, env) {
switch (token.modifier) {
case ":+":
return buildEBNFPlus(ruleName, token, env);
case ":*":
return buildEBNFStar(ruleName, token, env);
case ":?":
return buildEBNFOpt(ruleName, token, env);
}
}
function buildEBNFPlus(ruleName, token, env) {
var name = unique(ruleName + "$ebnf");
/*
structure.push({
name: name,
rules: [{
tokens: [token.ebnf],
}, {
tokens: [token.ebnf, name],
postprocess: {builtin: "arrconcat"}
}]
});
*/
produceRules(name,
[{
tokens: [token.ebnf],
}, {
tokens: [name, token.ebnf],
postprocess: {builtin: "arrpush"}
}],
env
);
return name;
}
function buildEBNFStar(ruleName, token, env) {
var name = unique(ruleName + "$ebnf");
/*
structure.push({
name: name,
rules: [{
tokens: [],
}, {
tokens: [token.ebnf, name],
postprocess: {builtin: "arrconcat"}
}]
});
*/
produceRules(name,
[{
tokens: [],
}, {
tokens: [name, token.ebnf],
postprocess: {builtin: "arrpush"}
}],
env
);
return name;
}
function buildEBNFOpt(ruleName, token, env) {
var name = unique(ruleName + "$ebnf");
/*
structure.push({
name: name,
rules: [{
tokens: [token.ebnf],
postprocess: {builtin: "id"}
}, {
tokens: [],
postprocess: {builtin: "nuller"}
}]
});
*/
produceRules(name,
[{
tokens: [token.ebnf],
postprocess: {builtin: "id"}
}, {
tokens: [],
postprocess: {builtin: "nuller"}
}],
env
);
return name;
}
function buildMacroCallToken(ruleName, token, env) {
var name = unique(ruleName + "$macrocall");
var macro = result.macros[token.macrocall];
if (!macro) {
throw new Error("Unkown macro: "+token.macrocall);
}
if (macro.args.length !== token.args.length) {
throw new Error("Argument count mismatch.");
}
var newenv = {__proto__: env};
for (var i=0; i<macro.args.length; i++) {
var argrulename = unique(ruleName + "$macrocall");
newenv[macro.args[i]] = argrulename;
produceRules(argrulename, [token.args[i]], env);
//structure.push({"name": argrulename, "rules":[token.args[i]]});
//buildRule(name, token.args[i], env);
}
produceRules(name, macro.exprs, newenv);
return name;
}
}
function uniquer() {
var uns = {};
return unique;
function unique(name) {
var un = uns[name] = (uns[name] || 0) + 1;
return name + '$' + un;
}
}
return Compile;
}));
}).call(this,require('_process'),"/node_modules/nearley/lib")
},{"./lint.js":7,"./nearley":9,"_process":4,"fs":2,"path":3}],6:[function(require,module,exports){
(function(root, factory) {
if (typeof module === 'object' && module.exports) {
module.exports = factory(require('./nearley'));
} else {
root.generate = factory(root.nearley);
}
}(this, function(nearley) {
function serializeRules(rules, builtinPostprocessors) {
return "[\n " + rules.map(function(rule) {
return serializeRule(rule, builtinPostprocessors);
}).join(",\n ") + "\n]";
}
function dedentFunc(func) {
var lines = func.toString().split(/\n/);
if (lines.length === 1) {
return [lines[0].replace(/^\s+|\s+$/g, '')];
}
var indent = null;
var tail = lines.slice(1);
for (var i = 0; i < tail.length; i++) {
var match = /^\s*/.exec(tail[i]);
if (match && match[0].length !== tail[i].length) {
if (indent === null ||
match[0].length < indent.length) {
indent = match[0];
}
}
}
if (indent === null) {
return lines;
}
return lines.map(function dedent(line) {
if (line.slice(0, indent.length) === indent) {
return line.slice(indent.length);
}
return line;
});
}
function tabulateString(string, indent, options) {
var lines;
if(Array.isArray(string)) {
lines = string;
} else {
lines = string.toString().split('\n');
}
options = options || {};
tabulated = lines.map(function addIndent(line, i) {
var shouldIndent = true;
if(i == 0 && !options.indentFirst) {
shouldIndent = false;
}
if(shouldIndent) {
return indent + line;
} else {
return line;
}
}).join('\n');
return tabulated;
}
function serializeSymbol(s) {
if (s instanceof RegExp) {
return s.toString();
} else if (s.token) {
return s.token;
} else {
return JSON.stringify(s);
}
}
function serializeRule(rule, builtinPostprocessors) {
var ret = '{';
ret += '"name": ' + JSON.stringify(rule.name);
ret += ', "symbols": [' + rule.symbols.map(serializeSymbol).join(', ') + ']';
if (rule.postprocess) {
if(rule.postprocess.builtin) {
rule.postprocess = builtinPostprocessors[rule.postprocess.builtin];
}
ret += ', "postprocess": ' + tabulateString(dedentFunc(rule.postprocess), ' ', {indentFirst: false});
}
ret += '}';
return ret;
}
var generate = function (parser, exportName) {
if(!parser.config.preprocessor) {
parser.config.preprocessor = "_default";
}
if(!generate[parser.config.preprocessor]) {
throw new Error("No such preprocessor: " + parser.config.preprocessor)
}
return generate[parser.config.preprocessor](parser, exportName);
};
generate.js = generate._default = generate.javascript = function (parser, exportName) {
var output = "// Generated automatically by nearley\n";
output += "// http://github.com/Hardmath123/nearley\n";
output += "(function () {\n";
output += "function id(x) {return x[0]; }\n";
output += parser.body.join('\n');
output += "var grammar = {\n";
output += " Lexer: " + parser.config.lexer + ",\n";
output += " ParserRules: " +
serializeRules(parser.rules, generate.javascript.builtinPostprocessors)
+ "\n";
output += " , ParserStart: " + JSON.stringify(parser.start) + "\n";
output += "}\n";
output += "if (typeof module !== 'undefined'"
+ "&& typeof module.exports !== 'undefined') {\n";
output += " module.exports = grammar;\n";
output += "} else {\n";
output += " window." + exportName + " = grammar;\n";
output += "}\n";
output += "})();\n";
return output;
};
generate.javascript.builtinPostprocessors = {
"joiner": "function joiner(d) {return d.join('');}",
"arrconcat": "function arrconcat(d) {return [d[0]].concat(d[1]);}",
"arrpush": "function arrpush(d) {return d[0].concat([d[1]]);}",
"nuller": "function(d) {return null;}",
"id": "id"
}
generate.cs = generate.coffee = generate.coffeescript = function (parser, exportName) {
var output = "# Generated automatically by nearley\n";
output += "# http://github.com/Hardmath123/nearley\n";
output += "do ->\n";
output += " id = (d)->d[0]\n";
output += tabulateString(dedentFunc(parser.body.join('\n')), ' ') + '\n';
output += " grammar = {\n";
output += " Lexer: " + parser.config.lexer + ",\n";
output += " ParserRules: " +
tabulateString(
serializeRules(parser.rules, generate.coffeescript.builtinPostprocessors),
' ',
{indentFirst: false})
+ ",\n";
output += " ParserStart: " + JSON.stringify(parser.start) + "\n";
output += " }\n";
output += " if typeof module != 'undefined' "
+ "&& typeof module.exports != 'undefined'\n";
output += " module.exports = grammar;\n";
output += " else\n";
output += " window." + exportName + " = grammar;\n";
return output;
};
generate.coffeescript.builtinPostprocessors = {
"joiner": "(d) -> d.join('')",
"arrconcat": "(d) -> [d[0]].concat(d[1])",
"arrpush": "(d) -> d[0].concat([d[1]])",
"nuller": "() -> null",
"id": "id"
};
generate.ts = generate.typescript = function (parser, exportName) {
var output = "// Generated automatically by nearley\n";
output += "// http://github.com/Hardmath123/nearley\n";
output += "function id(d:any[]):any {return d[0];}\n";
output += parser.customTokens.map(function (token) { return "declare var " + token + ":any;\n" }).join("")
output += parser.body.join('\n');
output += "export interface Token {value:any; [key: string]:any};\n";
output += "export interface Lexer {reset:(chunk:string, info:any) => void; next:() => Token | undefined; save:() => any; formatError:(token:Token) => string; has:(tokenType:string) => boolean};\n";
output += "export interface NearleyRule {name:string; symbols:NearleySymbol[]; postprocess?:(d:any[],loc?:number,reject?:{})=>any};\n";
output += "export type NearleySymbol = string | {literal:any} | {test:(token:any) => boolean};\n";
output += "export var Lexer:Lexer|undefined = " + parser.config.lexer + ";\n";
output += "export var ParserRules:NearleyRule[] = " + serializeRules(parser.rules, generate.typescript.builtinPostprocessors) + ";\n";
output += "export var ParserStart:string = " + JSON.stringify(parser.start) + ";\n";
return output;
};
generate.typescript.builtinPostprocessors = {
"joiner": "(d) => d.join('')",
"arrconcat": "(d) => [d[0]].concat(d[1])",
"arrpush": "(d) => d[0].concat([d[1]])",
"nuller": "() => null",
"id": "id"
};
return generate;
}));
},{"./nearley":9}],7:[function(require,module,exports){
(function (process){
// Node-only
var warn = function (opts, str) {
opts.out.write("WARN"+"\t" + str + "\n");
}
function lintNames(grm, opts) {
var all = [];
grm.rules.forEach(function(rule) {
all.push(rule.name);
});
grm.rules.forEach(function(rule) {
rule.symbols.forEach(function(symbol) {
if (!symbol.literal && !symbol.token && symbol.constructor !== RegExp) {
if (all.indexOf(symbol) === -1) {
warn(opts,"Undefined symbol `" + symbol + "` used.");
}
}
});
});
}
function lint(grm, opts) {
if (!opts.out) opts.out = process.stderr;
lintNames(grm, opts);
}
module.exports = lint;
}).call(this,require('_process'))
},{"_process":4}],8:[function(require,module,exports){
// Generated automatically by nearley
// http://github.com/Hardmath123/nearley
(function () {
function id(x) {return x[0]; }
function insensitive(sl) {
var s = sl.literal;
result = [];
for (var i=0; i<s.length; i++) {
var c = s.charAt(i);
if (c.toUpperCase() !== c || c.toLowerCase() !== c) {
result.push(new RegExp("[" + c.toLowerCase() + c.toUpperCase() + "]"));
} else {
result.push({literal: c});
}
}
return {subexpression: [{tokens: result, postprocess: function(d) {return d.join(""); }}]};
}
var grammar = {
Lexer: undefined,
ParserRules: [
{"name": "dqstring$ebnf$1", "symbols": []},
{"name": "dqstring$ebnf$1", "symbols": ["dqstring$ebnf$1", "dstrchar"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
{"name": "dqstring", "symbols": [{"literal":"\""}, "dqstring$ebnf$1", {"literal":"\""}], "postprocess": function(d) {return d[1].join(""); }},
{"name": "sqstring$ebnf$1", "symbols": []},
{"name": "sqstring$ebnf$1", "symbols": ["sqstring$ebnf$1", "sstrchar"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
{"name": "sqstring", "symbols": [{"literal":"'"}, "sqstring$ebnf$1", {"literal":"'"}], "postprocess": function(d) {return d[1].join(""); }},
{"name": "btstring$ebnf$1", "symbols": []},
{"name": "btstring$ebnf$1", "symbols": ["btstring$ebnf$1", /[^`]/], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
{"name": "btstring", "symbols": [{"literal":"`"}, "btstring$ebnf$1", {"literal":"`"}], "postprocess": function(d) {return d[1].join(""); }},
{"name": "dstrchar", "symbols": [/[^\\"\n]/], "postprocess": id},
{"name": "dstrchar", "symbols": [{"literal":"\\"}, "strescape"], "postprocess":
function(d) {
return JSON.parse("\""+d.join("")+"\"");
}
},
{"name": "sstrchar", "symbols": [/[^\\'\n]/], "postprocess": id},
{"name": "sstrchar", "symbols": [{"literal":"\\"}, "strescape"], "postprocess": function(d) { return JSON.parse("\""+d.join("")+"\""); }},
{"name": "sstrchar$string$1", "symbols": [{"literal":"\\"}, {"literal":"'"}], "postprocess": function joiner(d) {return d.join('');}},
{"name": "sstrchar", "symbols": ["sstrchar$string$1"], "postprocess": function(d) {return "'"; }},
{"name": "strescape", "symbols": [/["\\\/bfnrt]/], "postprocess": id},
{"name": "strescape", "symbols": [{"literal":"u"}, /[a-fA-F0-9]/, /[a-fA-F0-9]/, /[a-fA-F0-9]/, /[a-fA-F0-9]/], "postprocess":
function(d) {
return d.join("");
}
},
{"name": "final", "symbols": ["whit?", "prog", "whit?"], "postprocess": function(d) { return d[1]; }},
{"name": "prog", "symbols": ["prod"], "postprocess": function(d) { return [d[0]]; }},
{"name": "prog", "symbols": ["prod", "whit", "prog"], "postprocess": function(d) { return [d[0]].concat(d[2]); }},
{"name": "prod$ebnf$1$subexpression$1", "symbols": [{"literal":"-"}]},
{"name": "prod$ebnf$1$subexpression$1", "symbols": [{"literal":"="}]},
{"name": "prod$ebnf$1", "symbols": ["prod$ebnf$1$subexpression$1"]},
{"name": "prod$ebnf$1$subexpression$2", "symbols": [{"literal":"-"}]},
{"name": "prod$ebnf$1$subexpression$2", "symbols": [{"literal":"="}]},
{"name": "prod$ebnf$1", "symbols": ["prod$ebnf$1", "prod$ebnf$1$subexpression$2"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
{"name": "prod", "symbols": ["word", "whit?", "prod$ebnf$1", {"literal":">"}, "whit?", "expression+"], "postprocess": function(d) { return {name: d[0], rules: d[5]}; }},
{"name": "prod$ebnf$2$subexpression$1", "symbols": [{"literal":"-"}]},
{"name": "prod$ebnf$2$subexpression$1", "symbols": [{"literal":"="}]},
{"name": "prod$ebnf$2", "symbols": ["prod$ebnf$2$subexpression$1"]},
{"name": "prod$ebnf$2$subexpression$2", "symbols": [{"literal":"-"}]},
{"name": "prod$ebnf$2$subexpression$2", "symbols": [{"literal":"="}]},
{"name": "prod$ebnf$2", "symbols": ["prod$ebnf$2", "prod$ebnf$2$subexpression$2"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
{"name": "prod", "symbols": ["word", {"literal":"["}, "wordlist", {"literal":"]"}, "whit?", "prod$ebnf$2", {"literal":">"}, "whit?", "expression+"], "postprocess": function(d) {return {macro: d[0], args: d[2], exprs: d[8]}}},
{"name": "prod", "symbols": [{"literal":"@"}, "whit?", "js"], "postprocess": function(d) { return {body: d[2]}; }},
{"name": "prod", "symbols": [{"literal":"@"}, "word", "whit", "word"], "postprocess": function(d) { return {config: d[1], value: d[3]}; }},
{"name": "prod$string$1", "symbols": [{"literal":"@"}, {"literal":"i"}, {"literal":"n"}, {"literal":"c"}, {"literal":"l"}, {"literal":"u"}, {"literal":"d"}, {"literal":"e"}], "postprocess": function joiner(d) {return d.join('');}},
{"name": "prod", "symbols": ["prod$string$1", "whit?", "string"], "postprocess": function(d) {return {include: d[2].literal, builtin: false}}},
{"name": "prod$string$2", "symbols": [{"literal":"@"}, {"literal":"b"}, {"literal":"u"}, {"literal":"i"}, {"literal":"l"}, {"literal":"t"}, {"literal":"i"}, {"literal":"n"}], "postprocess": function joiner(d) {return d.join('');}},
{"name": "prod", "symbols": ["prod$string$2", "whit?", "string"], "postprocess": function(d) {return {include: d[2].literal, builtin: true }}},
{"name": "expression+", "symbols": ["completeexpression"]},
{"name": "expression+", "symbols": ["expression+", "whit?", {"literal":"|"}, "whit?", "completeexpression"], "postprocess": function(d) { return d[0].concat([d[4]]); }},
{"name": "expressionlist", "symbols": ["completeexpression"]},
{"name": "expressionlist", "symbols": ["expressionlist", "whit?", {"literal":","}, "whit?", "completeexpression"], "postprocess": function(d) { return d[0].concat([d[4]]); }},
{"name": "wordlist", "symbols": ["word"]},
{"name": "wordlist", "symbols": ["wordlist", "whit?", {"literal":","}, "whit?", "word"], "postprocess": function(d) { return d[0].concat([d[4]]); }},
{"name": "completeexpression", "symbols": ["expr"], "postprocess": function(d) { return {tokens: d[0]}; }},
{"name": "completeexpression", "symbols": ["expr", "whit?", "js"], "postprocess": function(d) { return {tokens: d[0], postprocess: d[2]}; }},
{"name": "expr_member", "symbols": ["word"], "postprocess": id},
{"name": "expr_member", "symbols": [{"literal":"$"}, "word"], "postprocess": function(d) {return {mixin: d[1]}}},
{"name": "expr_member", "symbols": ["word", {"literal":"["}, "expressionlist", {"literal":"]"}], "postprocess": function(d) {return {macrocall: d[0], args: d[2]}}},
{"name": "expr_member$ebnf$1", "symbols": [{"literal":"i"}], "postprocess": id},
{"name": "expr_member$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}},
{"name": "expr_member", "symbols": ["string", "expr_member$ebnf$1"], "postprocess": function(d) { if (d[1]) {return insensitive(d[0]); } else {return d[0]; } }},
{"name": "expr_member", "symbols": [{"literal":"%"}, "word"], "postprocess": function(d) {return {token: d[1]}}},
{"name": "expr_member", "symbols": ["charclass"], "postprocess": id},
{"name": "expr_member", "symbols": [{"literal":"("}, "whit?", "expression+", "whit?", {"literal":")"}], "postprocess": function(d) {return {'subexpression': d[2]} ;}},
{"name": "expr_member", "symbols": ["expr_member", "whit?", "ebnf_modifier"], "postprocess": function(d) {return {'ebnf': d[0], 'modifier': d[2]}; }},
{"name": "ebnf_modifier$string$1", "symbols": [{"literal":":"}, {"literal":"+"}], "postprocess": function joiner(d) {return d.join('');}},
{"name": "ebnf_modifier", "symbols": ["ebnf_modifier$string$1"], "postprocess": id},
{"name": "ebnf_modifier$string$2", "symbols": [{"literal":":"}, {"literal":"*"}], "postprocess": function joiner(d) {return d.join('');}},
{"name": "ebnf_modifier", "symbols": ["ebnf_modifier$string$2"], "postprocess": id},
{"name": "ebnf_modifier$string$3", "symbols": [{"literal":":"}, {"literal":"?"}], "postprocess": function joiner(d) {return d.join('');}},
{"name": "ebnf_modifier", "symbols": ["ebnf_modifier$string$3"], "postprocess": id},
{"name": "expr", "symbols": ["expr_member"]},
{"name": "expr", "symbols": ["expr", "whit", "expr_member"], "postprocess": function(d){ return d[0].concat([d[2]]); }},
{"name": "word", "symbols": [/[\w\?\+]/], "postprocess": function(d){ return d[0]; }},
{"name": "word", "symbols": ["word", /[\w\?\+]/], "postprocess": function(d){ return d[0]+d[1]; }},
{"name": "string", "symbols": ["dqstring"], "postprocess": function(d) {return { literal: d[0] }; }},
{"name": "charclass", "symbols": [{"literal":"."}], "postprocess": function(d) { return new RegExp("."); }},
{"name": "charclass", "symbols": [{"literal":"["}, "charclassmembers", {"literal":"]"}], "postprocess": function(d) { return new RegExp("[" + d[1].join('') + "]"); }},
{"name": "charclassmembers", "symbols": []},
{"name": "charclassmembers", "symbols": ["charclassmembers", "charclassmember"], "postprocess": function(d) { return d[0].concat([d[1]]); }},
{"name": "charclassmember", "symbols": [/[^\\\]]/], "postprocess": function(d) { return d[0]; }},
{"name": "charclassmember", "symbols": [{"literal":"\\"}, /./], "postprocess": function(d) { return d[0] + d[1]; }},
{"name": "js", "symbols": [{"literal":"{"}, {"literal":"%"}, "jscode", {"literal":"%"}, {"literal":"}"}], "postprocess": function(d) { return d[2]; }},
{"name": "jscode", "symbols": [], "postprocess": function() {return "";}},
{"name": "jscode", "symbols": ["jscode", /[^%]/], "postprocess": function(d) {return d[0] + d[1];}},
{"name": "jscode", "symbols": ["jscode", {"literal":"%"}, /[^}]/], "postprocess": function(d) {return d[0] + d[1] + d[2]; }},
{"name": "whit", "symbols": ["whitraw"]},
{"name": "whit", "symbols": ["whitraw?", "comment", "whit?"]},
{"name": "whit?", "symbols": []},
{"name": "whit?", "symbols": ["whit"]},
{"name": "whitraw", "symbols": [/[\s]/]},
{"name": "whitraw", "symbols": ["whitraw", /[\s]/]},
{"name": "whitraw?", "symbols": []},
{"name": "whitraw?", "symbols": ["whitraw"]},
{"name": "comment", "symbols": [{"literal":"#"}, "commentchars", {"literal":"\n"}]},
{"name": "commentchars", "symbols": []},
{"name": "commentchars", "symbols": ["commentchars", /[^\n]/]}
]
, ParserStart: "final"
}
if (typeof module !== 'undefined'&& typeof module.exports !== 'undefined') {
module.exports = grammar;
} else {
window.grammar = grammar;
}
})();
},{}],9:[function(require,module,exports){
(function(root, factory) {
if (typeof module === 'object' && module.exports) {
module.exports = factory();
} else {
root.nearley = factory();
}
}(this, function() {
function Rule(name, symbols, postprocess) {
this.id = ++Rule.highestId;
this.name = name;
this.symbols = symbols; // a list of literal | regex class | nonterminal
this.postprocess = postprocess;
return this;
}
Rule.highestId = 0;
Rule.prototype.toString = function(withCursorAt) {
function stringifySymbolSequence (e) {
return e.literal ? JSON.stringify(e.literal) :
e.type ? '%' + e.type : e.toString();
}
var symbolSequence = (typeof withCursorAt === "undefined")
? this.symbols.map(stringifySymbolSequence).join(' ')
: ( this.symbols.slice(0, withCursorAt).map(stringifySymbolSequence).join(' ')
+ " ● "
+ this.symbols.slice(withCursorAt).map(stringifySymbolSequence).join(' ') );
return this.name + " → " + symbolSequence;
}
// a State is a rule at a position from a given starting point in the input stream (reference)
function State(rule, dot, reference, wantedBy) {
this.rule = rule;
this.dot = dot;
this.reference = reference;
this.data = [];
this.wantedBy = wantedBy;
this.isComplete = this.dot === rule.symbols.length;
}
State.prototype.toString = function() {
return "{" + this.rule.toString(this.dot) + "}, from: " + (this.reference || 0);
};
State.prototype.nextState = function(child) {
var state = new State(this.rule, this.dot + 1, this.reference, this.wantedBy);
state.left = this;
state.right = child;
if (state.isComplete) {
state.data = state.build();
}
return state;
};
State.prototype.build = function() {
var children = [];
var node = this;
do {
children.push(node.right.data);
node = node.left;
} while (node.left);
children.reverse();
return children;
};
State.prototype.finish = function() {
if (this.rule.postprocess) {
this.data = this.rule.postprocess(this.data, this.reference, Parser.fail);
}
};
function Column(grammar, index) {
this.grammar = grammar;
this.index = index;
this.states = [];
this.wants = {}; // states indexed by the non-terminal they expect
this.scannable = []; // list of states that expect a token
this.completed = {}; // states that are nullable
}
Column.prototype.process = function(nextColumn) {
var states = this.states;
var wants = this.wants;
var completed = this.completed;
for (var w = 0; w < states.length; w++) { // nb. we push() during iteration
var state = states[w];
if (state.isComplete) {
state.finish();
if (state.data !== Parser.fail) {
// complete
var wantedBy = state.wantedBy;
for (var i = wantedBy.length; i--; ) { // this line is hot
var left = wantedBy[i];
this.complete(left, state);
}
// special-case nullables
if (state.reference === this.index) {
// make sure future predictors of this rule get completed.
var exp = state.rule.name;
(this.completed[exp] = this.completed[exp] || []).push(state);
}
}
} else {
// queue scannable states
var exp = state.rule.symbols[state.dot];
if (typeof exp !== 'string') {
this.scannable.push(state);
continue;
}
// predict
if (wants[exp]) {
wants[exp].push(state);
if (completed.hasOwnProperty(exp)) {
var nulls = completed[exp];
for (var i = 0; i < nulls.length; i++) {
var right = nulls[i];
this.complete(state, right);
}
}
} else {
wants[exp] = [state];
this.predict(exp);
}
}
}
}
Column.prototype.predict = function(exp) {
var rules = this.grammar.byName[exp] || [];
for (var i = 0; i < rules.length; i++) {
var r = rules[i];
var wantedBy = this.wants[exp];
var s = new State(r, 0, this.index, wantedBy);
this.states.push(s);
}
}
Column.prototype.complete = function(left, right) {
var inp = right.rule.name;
if (left.rule.symbols[left.dot] === inp) {
var copy = left.nextState(right);
this.states.push(copy);
}
}
function Grammar(rules, start) {
this.rules = rules;
this.start = start || this.rules[0].name;
var byName = this.byName = {};
this.rules.forEach(function(rule) {
if (!byName.hasOwnProperty(rule.name)) {
byName[rule.name] = [];
}
byName[rule.name].push(rule);
});
}
// So we can allow passing (rules, start) directly to Parser for backwards compatibility
Grammar.fromCompiled = function(rules, start) {
var lexer = rules.Lexer;
if (rules.ParserStart) {
start = rules.ParserStart;
rules = rules.ParserRules;
}
var rules = rules.map(function (r) { return (new Rule(r.name, r.symbols, r.postprocess)); });
var g = new Grammar(rules, start);
g.lexer = lexer; // nb. storing lexer on Grammar is iffy, but unavoidable
return g;
}
function StreamLexer() {
this.reset("");
}
StreamLexer.prototype.reset = function(data, state) {
this.buffer = data;
this.index = 0;
this.line = state ? state.line : 1;
this.lastLineBreak = state ? -state.col : 0;
}
StreamLexer.prototype.next = function() {
if (this.index < this.buffer.length) {
var ch = this.buffer[this.index++];
if (ch === '\n') {
this.line += 1;
this.lastLineBreak = this.index;
}
return {value: ch};
}
}
StreamLexer.prototype.save = function() {
return {
line: this.line,
col: this.index - this.lastLineBreak,
}
}
StreamLexer.prototype.formatError = function(token, message) {
// nb. this gets called after consuming the offending token,
// so the culprit is index-1
var buffer = this.buffer;
if (typeof buffer === 'string') {
var nextLineBreak = buffer.indexOf('\n', this.index);
if (nextLineBreak === -1) nextLineBreak = buffer.length;
var line = buffer.substring(this.lastLineBreak, nextLineBreak)
var col = this.index - this.lastLineBreak;
message += " at line " + this.line + " col " + col + ":\n\n";
message += " " + line + "\n"
message += " " + Array(col).join(" ") + "^"
return message;
} else {
return message + " at index " + (this.index - 1);
}
}
function Parser(rules, start, options) {
if (rules instanceof Grammar) {
var grammar = rules;
var options = start;
} else {
var grammar = Grammar.fromCompiled(rules, start);
}
this.grammar = grammar;
// Read options
this.options = {
keepHistory: false,
lexer: grammar.lexer || new StreamLexer,
};
for (var key in (options || {})) {
this.options[key] = options[key];
}
// Setup lexer
this.lexer = this.options.lexer;
this.lexerState = undefined;
// Setup a table
var column = new Column(grammar, 0);
var table = this.table = [column];
// I could be expecting anything.
column.wants[grammar.start] = [];
column.predict(grammar.start);
// TODO what if start rule is nullable?
column.process();
this.current = 0; // token index
}
// create a reserved token for indicating a parse fail
Parser.fail = {};
Parser.prototype.feed = function(chunk) {
var lexer = this.lexer;
lexer.reset(chunk, this.lexerState);
var token;
while (token = lexer.next()) {
// We add new states to table[current+1]
var column = this.table[this.current];
// GC unused states
if (!this.options.keepHistory) {
delete this.table[this.current - 1];
}
var n = this.current + 1;
var nextColumn = new Column(this.grammar, n);
this.table.push(nextColumn);
// Advance all tokens that expect the symbol
var literal = token.value;
var value = lexer.constructor === StreamLexer ? token.value : token;
var scannable = column.scannable;
for (var w = scannable.length; w--; ) {
var state = scannable[w];
var expect = state.rule.symbols[state.dot];
// Try to consume the token
// either regex or literal
if (expect.test ? expect.test(value) :
expect.type ? expect.type === token.type
: expect.literal === literal) {
// Add it
var next = state.nextState({data: value, token: token, isToken: true, reference: n - 1});
nextColumn.states.push(next);
}
}
// Next, for each of the rules, we either
// (a) complete it, and try to see if the reference row expected that
// rule
// (b) predict the next nonterminal it expects by adding that
// nonterminal's start state
// To prevent duplication, we also keep track of rules we have already
// added
nextColumn.process();
// If needed, throw an error:
if (nextColumn.states.length === 0) {
// No states at all! This is not good.
var message = this.lexer.formatError(token, "invalid syntax") + "\n";
message += "Unexpected " + (token.type ? token.type + " token: " : "");
message += JSON.stringify(token.value !== undefined ? token.value : token) + "\n";
var err = new Error(message);
err.offset = this.current;
err.token = token;
throw err;
}
// maybe save lexer state
if (this.options.keepHistory) {
column.lexerState = lexer.save()
}
this.current++;
}
if (column) {
this.lexerState = lexer.save()
}
// Incrementally keep track of results
this.results = this.finish();
// Allow chaining, for whatever it's worth
return this;
};
Parser.prototype.save = function() {
var column = this.table[this.current];
column.lexerState = this.lexerState;
return column;
};
Parser.prototype.restore = function(column) {
var index = column.index;
this.current = index;
this.table[index] = column;
this.table.splice(index + 1);
this.lexerState = column.lexerState;
// Incrementally keep track of results
this.results = this.finish();
};
// nb. deprecated: use save/restore instead!
Parser.prototype.rewind = function(index) {
if (!this.options.keepHistory) {
throw new Error('set option `keepHistory` to enable rewinding')
}
// nb. recall column (table) indicies fall between token indicies.
// col 0 -- token 0 -- col 1
this.restore(this.table[index]);
};
Parser.prototype.finish = function() {
// Return the possible parsings
var considerations = [];
var start = this.grammar.start;
var column = this.table[this.table.length - 1]
column.states.forEach(function (t) {
if (t.rule.name === start
&& t.dot === t.rule.symbols.length
&& t.reference === 0
&& t.data !== Parser.fail) {
considerations.push(t);
}
});
return considerations.map(function(c) {return c.data; });
};
return {
Parser: Parser,
Grammar: Grammar,
Rule: Rule,
};
}));
},{}]},{},[1]);
// Taken from https://nearley.js.org/docs/using-in-frontend
// to be browserified for use with V8 within R, see:
// https://cran.r-project.org/web/packages/V8/vignettes/npm.html
global.nearley =require('nearley');
global.compile = require("nearley/lib/compile");
global.generate = require("nearley/lib/generate");
global.nearleyGrammar = require("nearley/lib/nearley-language-bootstrapped");
global.compileGrammar = function (sourceCode) {
// Parse the grammar source into an AST
const grammarParser = new nearley.Parser(nearleyGrammar);
grammarParser.feed(sourceCode);
const grammarAst = grammarParser.results[0]; // TODO check for errors
// Compile the AST into a set of rules
const grammarInfoObject = compile(grammarAst, {});
// Generate JavaScript code from the rules
const grammarJs = generate(grammarInfoObject, "grammar");
// Pretend this is a CommonJS environment to catch exports from the grammar.
const module = { exports: {} };
eval(grammarJs);
return module.exports;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment