Skip to content

Instantly share code, notes, and snippets.

@erezsh
Last active July 1, 2021 15:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erezsh/f0745a48f8a7f8a6b6375e798ec7dbfa to your computer and use it in GitHub Desktop.
Save erezsh/f0745a48f8a7f8a6b6375e798ec7dbfa to your computer and use it in GitHub Desktop.
WIP of Lark js (far from working yet)
function range(start, end) {
if (end === undefined) {
end = start;
start = 0;
}
res = []
for (let i=start; i<end; i++) res.push(i);
return res
};
String.prototype.format = function() {
var counter = 0;
var args = arguments;
return this.replace(/%[sr]/g, function() {
return args[counter++];
});
};
String.prototype.join = function(a) { return a.join(this) }
Set.prototype.union = function(setB) {
let _union = new Set(this)
for (let elem of setB) {
_union.add(elem)
}
return _union
}
Set.prototype.intersection = function(setB) {
let _intersection = new Set()
for (let elem of setB) {
if (this.has(elem)) {
_intersection.add(elem)
}
}
return _intersection
}
Object.prototype.items = function() {
return Object.entries(this)
}
function dict(d) {
if (d instanceof Map) {
return new Map(d)
}
return new Map(Object.entries(d))
}
Map.prototype.pop = function(key) {
let value = this.get(key)
this.delete(key)
return value
}
class _Decoratable {}
class Serialize {}
class Str {}
class ABC {}
NotImplemented = {};
const re = {
escapeRegExp(string) {
// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
},
compile(regex, flags) {
// May throw re.error
return new RegExp(regex, flags);
},
error: SyntaxError,
};
function _get_match(re_, regexp, s, flags) {
m = re_.compile(regexp, flags).match(s);
if (m != null) return m[0];
}
class Scanner {
constructor({
terminals,
g_regex_flags,
re_,
use_bytes,
match_whole = false,
}) {
this.terminals = terminals;
this.g_regex_flags = g_regex_flags;
this.re_ = re_;
this.use_bytes = use_bytes;
this.match_whole = match_whole;
this.allowed_types = new Set(this.terminals.map((t) => t.name));
this._regexp = this._build_mres(terminals);
}
_build_mres(terminals) {
let postfix = this.match_whole ? "$" : "";
let mres = [];
let pattern = "|".join(
terminals.map((t) => `(?<${t.name}>${t.pattern.to_regexp() + postfix})`)
);
console.log("SCANNER: ", pattern);
return new RegExp(pattern, this.g_regex_flags + "y");
}
match(text, pos) {
this._regexp.lastIndex = pos;
let m = this._regexp.exec(text);
if (m) {
// Find group. Ugly hack, but javascript is forcing my hand.
let group = null;
for (let [k, v] of Object.entries(m.groups)) {
if (v) {
group = k;
break;
}
}
return [m[0], group];
}
}
}
class LarkError extends Error {
// pass
}
class ConfigurationError extends LarkError {
// pass
}
function assert_config({
value,
options,
msg = "Got %r, expected one of %s",
} = {}) {
if (!(value in options)) {
throw new ConfigurationError(msg.format(value, options));
}
}
class GrammarError extends LarkError {
// pass
}
class ParseError extends LarkError {
// pass
}
class LexError extends LarkError {
// pass
}
class UnexpectedInput extends LarkError {
/*
UnexpectedInput Error.
Used as a base class for the following exceptions:
- ``UnexpectedToken``: The parser received an unexpected token
- ``UnexpectedCharacters``: The lexer encountered an unexpected string
After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
*/
get pos_in_stream() {
return null;
}
get _terminals_by_name() {
return null;
}
get_context({ text, span = 40 } = {}) {
let after,
before; /*
Returns a pretty string pinpointing the error in the text,
with span amount of context characters around it.
Note:
The parser doesn't hold a copy of the text it has to parse,
so you have to provide it again
*/
console.assert(this.pos_in_stream != null, this);
let pos = this.pos_in_stream;
let start = max(pos - span, 0);
let end = pos + span;
if (!(text instanceof bytes)) {
before = text.slice(start, pos).rsplit("\n", 1)[-1];
after = text.slice(pos, end).split("\n", 1)[0];
return before + after + "\n" + " " * before.expandtabs().length + "^\n";
} else {
before = text.slice(start, pos).rsplit("\n", 1)[-1];
after = text.slice(pos, end).split("\n", 1)[0];
return (
before +
after +
"\n" +
" " * before.expandtabs().length +
"^\n"
).decode("ascii", "backslashreplace");
}
}
match_examples({
parse_fn,
examples,
token_type_match_fallback = false,
use_accepts = false,
} = {}) {
/*
Allows you to detect what's wrong in the input text by matching
against example errors.
Given a parser instance and a dictionary mapping some label with
some malformed syntax examples, it'll return the label for the
example that bests matches the current error. The function will
iterate the dictionary until it finds a matching error, and
return the corresponding value.
For an example usage, see `examples/error_reporting_lalr.py`
Parameters:
parse_fn: parse function (usually ``lark_instance.parse``)
examples: dictionary of ``{'example_string': value}``.
use_accepts: Recommended to call this with ``use_accepts=True``.
The default is ``False`` for backwards compatibility.
*/
console.assert(this.state != null, "Not supported for this exception");
if (examples instanceof dict) {
examples = examples.items();
}
let candidate = [null, false];
for (let [i, [label, example]] of enumerate(examples)) {
console.assert(!(typeof example === "string"), "Expecting a list");
for (let [j, malformed] of enumerate(example)) {
try {
parse_fn(malformed);
} catch (e) {
if (e instanceof UnexpectedInput) {
ut = e;
if (ut.state === this.state) {
if (
use_accepts &&
hasattr(this, "accepts") &&
ut.accepts != this.accepts
) {
logger.debug(
"Different accepts with same state[%d]: %s != %s at example [%s][%s]".format(
this.state,
this.accepts,
ut.accepts,
i,
j
)
);
continue;
}
if (
this.token &&
logger &&
this &&
ut &&
ut.token &&
this.token.type &&
logger.debug &&
this.token &&
ut.token &&
ut.token.type
) {
if (ut.token === this.token) {
// Try exact match first
logger.debug("Exact Match at example [%s][%s]".format(i, j));
return label;
}
if (token_type_match_fallback) {
// Fallback to token types match
if (ut.token.type === this.token.type && !candidate[-1]) {
logger.debug(
"Token Type Fallback at example [%s][%s]".format(i, j)
);
candidate = [label, true];
}
}
} else {
// pass
}
if (candidate[0] === null) {
logger.debug(
"Same State match at example [%s][%s]".format(i, j)
);
candidate = [label, false];
}
}
} else {
throw e;
}
}
}
}
return candidate[0];
}
_format_expected(expected) {
let d;
if (this._terminals_by_name) {
d = this._terminals_by_name;
expected = expected.map((t_name) =>
t_name in d ? d[t_name].user_repr() : t_name
);
}
return "Expected one of: \n\t* %s\n".format("\n\t* ".join(expected));
}
}
class UnexpectedEOF extends ParseError {
constructor({ expected, state = null, terminals_by_name = null } = {}) {
super();
this.expected = expected;
this.state = state;
this.token = new Token("<EOF>", ""); // , line=-1, column=-1, pos_in_stream=-1)
this.pos_in_stream = -1;
this.line = -1;
this.column = -1;
this._terminals_by_name = terminals_by_name;
}
__str__() {
let message = "Unexpected end-of-input. ";
message += this._format_expected(this.expected);
return message;
}
}
class UnexpectedCharacters extends LexError {
constructor({
seq,
lex_pos,
line,
column,
allowed = null,
considered_tokens = null,
state = null,
token_history = null,
terminals_by_name = null,
considered_rules = null,
} = {}) {
super(); // TODO considered_tokens and allowed can be figured out using state
this.line = line;
this.column = column;
this.pos_in_stream = lex_pos;
this.state = state;
this._terminals_by_name = terminals_by_name;
this.allowed = allowed;
this.considered_tokens = considered_tokens;
this.considered_rules = considered_rules;
this.token_history = token_history;
if (seq instanceof bytes) {
this.char = seq
.slice(lex_pos, lex_pos + 1)
.decode("ascii", "backslashreplace");
} else {
this.char = seq[lex_pos];
}
this._context = this.get_context({ seq });
}
__str__() {
let message = "No terminal matches '%s' in the current parser context, at line %d col %d".format(
this.char,
this.line,
this.column
);
message += "\n\n" + this._context;
if (this.allowed) {
message += this._format_expected(this.allowed);
}
if (this.token_history) {
message += "\nPrevious tokens: %s\n".format(
", ".join(this.token_history.map((t) => repr(t)))
);
}
return message;
}
}
class UnexpectedToken extends ParseError {
/*
An exception that is raised by the parser, when the token it received
doesn't match any valid step forward.
The parser provides an interactive instance through `interactive_parser`,
which is initialized to the point of failture, and can be used for debugging and error handling.
see: ``InteractiveParser``.
*/
constructor({
token,
expected,
considered_rules = null,
state = null,
interactive_parser = null,
terminals_by_name = null,
token_history = null,
} = {}) {
super(); // TODO considered_rules and expected can be figured out using state
this.line = token["line"] || "?";
this.column = token["column"] || "?";
this.pos_in_stream = token["start_pos"] || null;
this.state = state;
this.token = token;
this.expected = expected; // XXX deprecate? `accepts` is better
this._accepts = NO_VALUE;
this.considered_rules = considered_rules;
this.interactive_parser = interactive_parser;
this._terminals_by_name = terminals_by_name;
this.token_history = token_history;
}
get accepts() {
if (this._accepts === NO_VALUE) {
this._accepts =
this.interactive_parser && this.interactive_parser.accepts();
}
return this._accepts;
}
__str__() {
let message = "Unexpected token %r at line %s, column %s.\n%s".format(
this.token,
this.line,
this.column,
this._format_expected(this.accepts || this.expected)
);
if (this.token_history) {
message += "Previous tokens: %r\n".format(this.token_history);
}
return message;
}
}
class VisitError extends LarkError {
/*
VisitError is raised when visitors are interrupted by an exception
It provides the following attributes for inspection:
- obj: the tree node or token it was processing when the exception was raised
- orig_exc: the exception that cause it to fail
*/
constructor(rule, obj, orig_exc) {
let message = 'Error trying to process rule "%s":\n\n%s'.format(
rule,
orig_exc
);
super(message);
this.obj = obj;
this.orig_exc = orig_exc;
}
}
class Meta {
constructor() {
this.empty = true;
}
}
class Discard extends Error {
/*
When raising the Discard exception in a transformer callback,
that node is discarded and won't appear in the parent.
*/
// pass
}
class Transformer extends _Decoratable {
/*
Transformers visit each node of the tree, and run the appropriate method on it according to the node's data.
Methods are provided by the user via inheritance, and called according to ``tree.data``.
The returned value from each method replaces the node in the tree structure.
Transformers work bottom-up (or depth-first), starting with the leaves and ending at the root of the tree.
Transformers can be used to implement map & reduce patterns. Because nodes are reduced from leaf to root,
at any point the callbacks may assume the children have already been transformed (if applicable).
``Transformer`` can do anything ``Visitor`` can do, but because it reconstructs the tree,
it is slightly less efficient.
All these classes implement the transformer interface:
- ``Transformer`` - Recursively transforms the tree. This is the one you probably want.
- ``Transformer_InPlace`` - Non-recursive. Changes the tree in-place instead of returning new instances
- ``Transformer_InPlaceRecursive`` - Recursive. Changes the tree in-place instead of returning new instances
Parameters:
visit_tokens (bool, optional): Should the transformer visit tokens in addition to rules.
Setting this to ``False`` is slightly faster. Defaults to ``True``.
(For processing ignored tokens, use the ``lexer_callbacks`` options)
NOTE: A transformer without methods essentially performs a non-memoized partial deepcopy.
*/
get __visit_tokens__() {
return true;
} // For backwards compatibility
constructor({ visit_tokens = true } = {}) {
super();
this.__visit_tokens__ = visit_tokens;
}
_call_userfunc({ tree, new_children = null } = {}) {
let f, wrapper;
let // Assumes tree is already transformed
children = new_children != null ? new_children : tree.children;
if (tree && tree.data && this && this[tree.data]) {
f = this[tree.data];
try {
wrapper = f["visit_wrapper"] || null;
if (wrapper != null) {
return f.visit_wrapper(f, tree.data, children, tree.meta);
} else {
return f(children);
}
} catch (e) {
if (e instanceof [GrammarError, Discard]) {
throw None;
} else if (e instanceof Error) {
throw new VisitError(tree.data, tree, e);
} else {
throw e;
}
}
} else {
return this.__default__(tree.data, children, tree.meta);
}
}
_call_userfunc_token(token) {
let f;
if (token && token.type && this && this[token.type]) {
f = this[token.type];
try {
return f(token);
} catch (e) {
if (e instanceof [GrammarError, Discard]) {
throw None;
} else if (e instanceof Error) {
throw new VisitError(token.type, token, e);
} else {
throw e;
}
}
} else {
return this.__default_token__(token);
}
}
*_transform_children(children) {
for (let c of children) {
try {
if (c instanceof Tree) {
yield this._transform_tree(c);
} else if (this.__visit_tokens__ && c instanceof Token) {
yield this._call_userfunc_token(c);
} else {
yield c;
}
} catch (e) {
if (e instanceof Discard) {
// pass
} else {
throw e;
}
}
}
}
_transform_tree(tree) {
let children = new Array.from(this._transform_children(tree.children));
return this._call_userfunc({ tree, children });
}
transform(tree) {
/*
Transform the given tree, and return the final result
*/
return this._transform_tree(tree);
}
__mul__(other) {
/*
Chain two transformers together, returning a new transformer.
*/
return new TransformerChain(this, other);
}
__default__(data, children, meta) {
/*
Default function that is called if there is no attribute matching ``data``
Can be overridden. Defaults to creating a new copy of the tree node (i.e. ``return Tree(data, children, meta)``)
*/
return new Tree(data, children, meta);
}
__default_token__(token) {
/*
Default function that is called if there is no attribute matching ``token.type``
Can be overridden. Defaults to returning the token as-is.
*/
return token;
}
}
class Transformer_InPlace extends Transformer {
/*
Same as Transformer, but non-recursive, and changes the tree in-place instead of returning new instances
Useful for huge trees. Conservative in memory.
*/
_transform_tree(tree) {
// Cancel recursion
return this._call_userfunc({ tree });
}
transform(tree) {
for (let subtree of tree.iter_subtrees()) {
subtree.children = new Array.from(
this._transform_children(subtree.children)
);
}
return this._transform_tree(tree);
}
}
class Transformer_NonRecursive extends Transformer {
/*
Same as Transformer but non-recursive.
Like Transformer, it doesn't change the original tree.
Useful for huge trees.
*/
transform(tree) {
let size, args, t;
let // Tree to postfix
rev_postfix = [];
let q = [tree];
while (q) {
t = q.pop();
rev_postfix.push(t);
if (t instanceof Tree) {
q.push(...t.children);
}
}
let // Postfix to tree
stack = [];
for (let x of rev_postfix.slice().reverse()) {
if (x instanceof Tree) {
size = x.children.length;
if (size) {
args = stack.slice(-size);
} else {
args = [];
}
stack.push(this._call_userfunc({ x, args }));
} else if (this.__visit_tokens__ && x instanceof Token) {
stack.push(this._call_userfunc_token(x));
} else {
stack.push(x);
}
}
[t] = stack; // We should have only one tree remaining
return t;
}
}
class Transformer_InPlaceRecursive extends Transformer {
/*
Same as Transformer, recursive, but changes the tree in-place instead of returning new instances
*/
_transform_tree(tree) {
tree.children = new Array.from(this._transform_children(tree.children));
return this._call_userfunc(tree);
}
} // Visitors
class VisitorBase {
_call_userfunc(tree) {
return (this[tree.data] || this.__default__)(tree);
}
__default__(tree) {
/*
Default function that is called if there is no attribute matching ``tree.data``
Can be overridden. Defaults to doing nothing.
*/
return tree;
}
__class_getitem__(_) {
return cls;
}
}
class Visitor extends VisitorBase {
/*
Tree visitor, non-recursive (can handle huge trees).
Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data``
*/
visit(tree) {
/*
Visits the tree, starting with the leaves and finally the root (bottom-up)
*/
for (let subtree of tree.iter_subtrees()) {
this._call_userfunc(subtree);
}
return tree;
}
visit_topdown(tree) {
/*
Visit the tree, starting at the root, and ending at the leaves (top-down)
*/
for (let subtree of tree.iter_subtrees_topdown()) {
this._call_userfunc(subtree);
}
return tree;
}
}
class Visitor_Recursive extends VisitorBase {
/*
Bottom-up visitor, recursive.
Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data``
Slightly faster than the non-recursive version.
*/
visit(tree) {
/*
Visits the tree, starting with the leaves and finally the root (bottom-up)
*/
for (let child of tree.children) {
if (child instanceof Tree) {
this.visit(child);
}
}
this._call_userfunc(tree);
return tree;
}
visit_topdown(tree) {
/*
Visit the tree, starting at the root, and ending at the leaves (top-down)
*/
this._call_userfunc(tree);
for (let child of tree.children) {
if (child instanceof Tree) {
this.visit_topdown(child);
}
}
return tree;
}
}
class Interpreter extends _Decoratable {
/*
Interpreter walks the tree starting at the root.
Visits the tree, starting with the root and finally the leaves (top-down)
For each tree node, it calls its methods (provided by user via inheritance) according to ``tree.data``.
Unlike ``Transformer`` and ``Visitor``, the Interpreter doesn't automatically visit its sub-branches.
The user has to explicitly call ``visit``, ``visit_children``, or use the ``@visit_children_decor``.
This allows the user to implement branching and loops.
*/
visit(tree) {
let f = this[tree.data];
let wrapper = f["visit_wrapper"] || null;
if (wrapper != null) {
return f.visit_wrapper(f, tree.data, tree.children, tree.meta);
} else {
return f(tree);
}
}
visit_children(tree) {
return tree.children.map((child) =>
child instanceof Tree ? this.visit(child) : child
);
}
__getattr__(name) {
return this.__default__;
}
__default__(tree) {
return this.visit_children(tree);
}
}
class Symbol extends Serialize {
get is_term() {
return NotImplemented;
}
constructor(name) {
super();
this.name = name;
}
__eq__(other) {
console.assert(other instanceof Symbol, other);
return this.is_term === other.is_term && this.name === other.name;
}
__ne__(other) {
return !(this === other);
}
__hash__() {
return hash(this.name);
}
__repr__() {
return "%s(%r)".format(type(this).__name__, this.name);
}
get fullrepr() {
return property(__repr__);
}
}
class Terminal extends Symbol {
get is_term() {
return true;
}
constructor({ name, filter_out = false } = {}) {
super();
this.name = name;
this.filter_out = filter_out;
}
get fullrepr() {
return "%s(%r, %r)".format(type(this).__name__, this.name, this.filter_out);
}
}
class NonTerminal extends Symbol {
get is_term() {
return false;
}
}
class RuleOptions extends Serialize {
constructor({
keep_all_tokens = false,
expand1 = false,
priority = null,
template_source = null,
empty_indices = [],
} = {}) {
super();
this.keep_all_tokens = keep_all_tokens;
this.expand1 = expand1;
this.priority = priority;
this.template_source = template_source;
this.empty_indices = empty_indices;
}
__repr__() {
return "RuleOptions(%r, %r, %r, %r)".format(
this.keep_all_tokens,
this.expand1,
this.priority,
this.template_source
);
}
}
class Rule extends Serialize {
/*
origin : a symbol
expansion : a list of symbols
order : index of this expansion amongst all rules of the same name
*/
constructor({
origin,
expansion,
order = 0,
alias = null,
options = null,
} = {}) {
super();
this.origin = origin;
this.expansion = expansion;
this.alias = alias;
this.order = order;
this.options = options || new RuleOptions();
this._hash = hash([this.origin, tuple(this.expansion)]);
}
_deserialize() {
this._hash = hash([this.origin, tuple(this.expansion)]);
}
__str__() {
return "<%s : %s>".format(
this.origin.name,
" ".join(this.expansion.map((x) => x.name))
);
}
__repr__() {
return "Rule(%r, %r, %r, %r)".format(
this.origin,
this.expansion,
this.alias,
this.options
);
}
__hash__() {
return this._hash;
}
__eq__(other) {
if (!(other instanceof Rule)) {
return false;
}
return this.origin === other.origin && this.expansion === other.expansion;
}
}
class Pattern extends Serialize {
get raw() {
return null;
}
get type() {
return null;
}
constructor({ value, flags = [], raw = null } = {}) {
super();
this.value = value;
this.flags = frozenset(flags);
this.raw = raw;
}
__repr__() {
return repr(this.to_regexp());
} // Pattern Hashing assumes all subclasses have a different priority!
__hash__() {
return hash([type(this), this.value, this.flags]);
}
__eq__(other) {
return (
type(this) === type(other) &&
this.value === other.value &&
this.flags === other.flags
);
}
to_regexp() {
throw new NotImplementedError();
}
min_width() {
throw new NotImplementedError();
}
max_width() {
throw new NotImplementedError();
}
_get_flags(value) {
for (let f of this.flags) {
value = "(?%s:%s)".format(f, value);
}
return value;
}
}
class PatternStr extends Pattern {
get type() {
return "str";
}
to_regexp() {
return this._get_flags(re.escape(this.value));
}
get min_width() {
return this.value.length;
}
get max_width() {
return min_width;
}
}
class PatternRE extends Pattern {
get type() {
return "re";
}
to_regexp() {
return this._get_flags(this.value);
}
get _width() {
return null;
}
_get_width() {
if (this._width === null) {
this._width = get_regexp_width(this.to_regexp());
}
return this._width;
}
get min_width() {
return this._get_width()[0];
}
get max_width() {
return this._get_width()[1];
}
}
class Token extends str {
/*
A string with meta-information, that is produced by the lexer.
When parsing text, the resulting chunks of the input that haven't been discarded,
will end up in the tree as Token instances. The Token class inherits from Python's ``str``,
so normal string comparisons and operations will work as expected.
Attributes:
type: Name of the token (as specified in grammar)
value: Value of the token (redundant, as ``token.value == token`` will always be true)
start_pos: The index of the token in the text
line: The line of the token in the text (starting with 1)
column: The column of the token in the text (starting with 1)
end_line: The line where the token ends
end_column: The next column after the end of the token. For example,
if the token is a single character with a column value of 4,
end_column will be 5.
end_pos: the index where the token ends (basically ``start_pos + len(token)``)
*/
constructor({
type_,
value,
start_pos = null,
line = null,
column = null,
end_line = null,
end_column = null,
end_pos = null,
} = {}) {
super();
let inst = super.__new__(cls, value);
inst.type = type_;
inst.start_pos = start_pos;
inst.value = value;
inst.line = line;
inst.column = column;
inst.end_line = end_line;
inst.end_column = end_column;
inst.end_pos = end_pos;
return inst;
}
update({ type_ = null, value = null } = {}) {
return new Token.new_borrow_pos(
type_ != null ? type_ : this.type,
value != null ? value : this.value,
this
);
}
static new_borrow_pos(type_, value, borrow_t) {
cls = Token;
return new cls(
type_,
value,
borrow_t.start_pos,
borrow_t.line,
borrow_t.column,
borrow_t.end_line,
borrow_t.end_column,
borrow_t.end_pos
);
}
__reduce__() {
return [
this.__class__,
[this.type, this.value, this.start_pos, this.line, this.column],
];
}
__repr__() {
return "Token(%r, %r)".format(this.type, this.value);
}
__deepcopy__(memo) {
return new Token(
this.type,
this.value,
this.start_pos,
this.line,
this.column
);
}
__eq__(other) {
if (other instanceof Token && this.type != other.type) {
return false;
}
return str.__eq__(this, other);
}
get __hash__() {
return str.__hash__;
}
}
class LineCounter {
constructor(newline_char) {
this.newline_char = newline_char;
this.char_pos = 0;
this.line = 1;
this.column = 1;
this.line_start_pos = 0;
}
__eq__(other) {
if (!(other instanceof LineCounter)) {
return NotImplemented;
}
return (
this.char_pos === other.char_pos &&
this.newline_char === other.newline_char
);
}
feed({ token, test_newline = true } = {}) {
let newlines; /*
Consume a token and calculate the new line & column.
As an optional optimization, set test_newline=False if token doesn't contain a newline.
*/
if (test_newline) {
newlines = token.count(this.newline_char);
if (newlines) {
this.line += newlines;
this.line_start_pos =
this.char_pos + token.rindex(this.newline_char) + 1;
}
}
this.char_pos += token.length;
this.column = this.char_pos - this.line_start_pos + 1;
}
}
class UnlessCallback {
constructor(scanner) {
this.scanner = scanner;
}
__call__(t) {
let res = this.scanner.match(t.value, 0);
if (res) {
[_value, t.type] = res;
}
return t;
}
}
class CallChain {
constructor(callback1, callback2, cond) {
this.callback1 = callback1;
this.callback2 = callback2;
this.cond = cond;
}
__call__(t) {
let t2 = this.callback1(t);
return this.cond(t2) ? this.callback2(t) : t2;
}
}
class Lexer {
/*
Lexer interface
Method Signatures:
lex(self, text) -> Iterator[Token]
*/
get lex() {
return NotImplemented;
}
make_lexer_state(text) {
let line_ctr = new LineCounter(text instanceof bytes ? "\n" : "\n");
return new LexerState({ text, line_ctr });
}
}
class TraditionalLexer extends Lexer {
constructor(conf) {
super();
let terminals = new Array.from(conf.terminals);
console.assert(
all(terminals.map((t) => t instanceof TerminalDef)),
terminals
);
this.re = conf.re_module;
if (!conf.skip_validation) {
// Sanitization
for (let t of terminals) {
try {
this.re.compile(t.pattern.to_regexp(), conf.g_regex_flags);
} catch (e) {
if (e instanceof this.re.error) {
throw new LexError(
"Cannot compile token %s: %s".format(t.name, t.pattern)
);
} else {
throw e;
}
}
if (t.pattern.min_width === 0) {
throw new LexError(
"Lexer does not allow zero-width terminals. (%s: %s)".format(
t.name,
t.pattern
)
);
}
}
if (!(new Set(conf.ignore) <= new Set(terminals.map((t) => t.name)))) {
throw new LexError(
"Ignore terminals are not defined: %s".format(
new Set(conf.ignore) - new Set(terminals.map((t) => t.name))
)
);
}
} // Init
this.newline_types = frozenset(
terminals
.filter((t) => _regexp_has_newline(t.pattern.to_regexp()))
.map((t) => t.name)
);
this.ignore_types = frozenset(conf.ignore);
terminals.sort({
key: (x) => [
-x.priority,
-x.pattern.max_width,
-x.pattern.value.length,
x.name,
],
});
this.terminals = terminals;
this.user_callbacks = conf.callbacks;
this.g_regex_flags = conf.g_regex_flags;
this.use_bytes = conf.use_bytes;
this.terminals_by_name = conf.terminals_by_name;
this._scanner = null;
}
_build_scanner() {
[terminals, this.callback] = _create_unless(
this.terminals,
this.g_regex_flags,
this.re,
this.use_bytes
);
console.assert(all(this.callback.values()));
for (let [type_, f] of this.user_callbacks.items()) {
if (type_ in this.callback) {
// Already a callback there, probably UnlessCallback
this.callback[type_] = new CallChain(
this.callback[type_],
f,
(t) => t.type === type_
);
} else {
this.callback[type_] = f;
}
}
this._scanner = new Scanner(
terminals,
this.g_regex_flags,
this.re,
this.use_bytes
);
}
get scanner() {
if (this._scanner === null) {
this._build_scanner();
}
return this._scanner;
}
match(text, pos) {
return this.scanner.match(text, pos);
}
*lex(state, parser_state) {
try {
while (true) {
yield this.next_token({ state, parser_state });
}
} catch (e) {
if (e instanceof EOFError) {
// pass
} else {
throw e;
}
}
}
next_token({ lex_state, parser_state = null } = {}) {
let allowed, t, t2, res;
let line_ctr = lex_state.line_ctr;
while (line_ctr.char_pos < lex_state.text.length) {
res = this.match(lex_state.text, line_ctr.char_pos);
if (!res) {
allowed = this.scanner.allowed_types - this.ignore_types;
if (!allowed) {
allowed = new Set(["<END-OF-FILE>"]);
}
throw new UnexpectedCharacters({
unknown_param: lex_state.text,
unknown_param: line_ctr.char_pos,
unknown_param: line_ctr.line,
unknown_param: line_ctr.column,
allowed: allowed,
token_history: lex_state.last_token && [lex_state.last_token],
state: parser_state,
terminals_by_name: this.terminals_by_name,
});
}
[value, type_] = res;
if (!(type_ in this.ignore_types)) {
t = new Token(
type_,
value,
line_ctr.char_pos,
line_ctr.line,
line_ctr.column
);
line_ctr.feed(value, type_ in this.newline_types);
t.end_line = line_ctr.line;
t.end_column = line_ctr.column;
t.end_pos = line_ctr.char_pos;
if (t.type in this.callback) {
t = this.callback[t.type](t);
if (!(t instanceof Token)) {
throw new LexError(
"Callbacks must return a token (returned %r)".format(t)
);
}
}
lex_state.last_token = t;
return t;
} else {
if (type_ in this.callback) {
t2 = new Token(
type_,
value,
line_ctr.char_pos,
line_ctr.line,
line_ctr.column
);
this.callback[type_](t2);
}
line_ctr.feed(value, type_ in this.newline_types);
}
} // EOF
throw new EOFError(this);
}
}
class LexerState {
constructor({ text, line_ctr, last_token = null } = {}) {
this.text = text;
this.line_ctr = line_ctr;
this.last_token = last_token;
}
__eq__(other) {
if (!(other instanceof LexerState)) {
return NotImplemented;
}
return (
this.text === other.text &&
this.line_ctr === other.line_ctr &&
this.last_token === other.last_token
);
}
__copy__() {
return type(this)(this.text, copy(this.line_ctr), this.last_token);
}
}
class ContextualLexer extends Lexer {
constructor({ conf, states, always_accept = [] } = {}) {
super();
let key, lexer, lexer_conf, accepts;
let terminals = new Array.from(conf.terminals);
let terminals_by_name = conf.terminals_by_name;
let trad_conf = copy(conf);
trad_conf.terminals = terminals;
let lexer_by_tokens = {};
this.lexers = {};
for (let [state, accepts] of states.items()) {
key = frozenset(accepts);
try {
lexer = lexer_by_tokens[key];
} catch (e) {
if (e instanceof KeyError) {
accepts = new Set(accepts).union([
...new Set(conf.ignore),
...new Set(always_accept),
]);
lexer_conf = copy(trad_conf);
lexer_conf.terminals = accepts
.filter((n) => n in terminals_by_name)
.map((n) => terminals_by_name[n]);
lexer = new TraditionalLexer(lexer_conf);
lexer_by_tokens[key] = lexer;
} else {
throw e;
}
}
this.lexers[state] = lexer;
}
console.assert(trad_conf.terminals === terminals);
this.root_lexer = new TraditionalLexer(trad_conf);
}
make_lexer_state(text) {
return this.root_lexer.make_lexer_state(text);
}
*lex(lexer_state, parser_state) {
let lexer, // Save last_token. Calling root_lexer.next_token will change this to the wrong token
token,
last_token;
try {
while (true) {
lexer = this.lexers[parser_state.position];
yield lexer.next_token({ lexer_state, parser_state });
}
} catch (e) {
if (e instanceof EOFError) {
// pass
} else if (e instanceof UnexpectedCharacters) {
// In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
// This tests the input against the global context, to provide a nicer error.
try {
last_token = lexer_state.last_token; // Save last_token. Calling root_lexer.next_token will change this to the wrong token
token = this.root_lexer.next_token({ lexer_state, parser_state });
throw new UnexpectedToken({
token,
unknown_param: e.allowed,
state: parser_state,
token_history: [last_token],
terminals_by_name: this.root_lexer.terminals_by_name,
});
} catch (e) {
if (e instanceof UnexpectedCharacters) {
throw e;
} else {
throw e;
}
}
} else {
throw e;
}
}
}
} // Raise the original UnexpectedCharacters. The root lexer raises it with the wrong expected set.
class LexerThread {
/*
A thread that ties a lexer instance and a lexer state, to be used by the parser
*/
constructor(lexer, text) {
this.lexer = lexer;
this.state = lexer.make_lexer_state(text);
}
lex(parser_state) {
return this.lexer.lex(this.state, parser_state);
}
__copy__() {
let copied = object.__new__(LexerThread);
copied.lexer = this.lexer;
copied.state = copy(this.state);
return copied;
}
}
class LexerConf extends Serialize {
constructor({
terminals,
re_module,
ignore = [],
postlex = null,
callbacks = null,
g_regex_flags = 0,
skip_validation = false,
use_bytes = false,
} = {}) {
super();
this.terminals = terminals;
this.terminals_by_name = new Map(this.terminals.map((t) => [t.name, t]));
console.assert(this.terminals.length === this.terminals_by_name.length);
this.ignore = ignore;
this.postlex = postlex;
this.callbacks = callbacks || {};
this.g_regex_flags = g_regex_flags;
this.re_module = re_module;
this.skip_validation = skip_validation;
this.use_bytes = use_bytes;
this.lexer_type = null;
}
_deserialize() {
this.terminals_by_name = new Map(this.terminals.map((t) => [t.name, t]));
}
}
class ParserConf extends Serialize {
constructor(rules, callbacks, start) {
super();
console.assert(start instanceof list);
this.rules = rules;
this.callbacks = callbacks;
this.start = start;
this.parser_type = null;
}
}
class ExpandSingleChild {
constructor(node_builder) {
this.node_builder = node_builder;
}
__call__(children) {
if (children.length === 1) {
return children[0];
} else {
return this.node_builder(children);
}
}
}
class PropagatePositions {
constructor({ node_builder, node_filter = null } = {}) {
this.node_builder = node_builder;
this.node_filter = node_filter;
}
__call__(children) {
let res_meta, last_meta, first_meta;
let res = this.node_builder(children); // local reference to Tree.meta reduces number of presence checks
if (res instanceof Tree) {
res_meta = res.meta;
first_meta = this._pp_get_meta(children);
if (first_meta != null) {
res_meta.line = first_meta.line;
res_meta.column = first_meta.column;
res_meta.start_pos = first_meta.start_pos;
res_meta.empty = false;
}
last_meta = this._pp_get_meta(children.slice().reverse());
if (last_meta != null) {
res_meta.end_line = last_meta.end_line;
res_meta.end_column = last_meta.end_column;
res_meta.end_pos = last_meta.end_pos;
res_meta.empty = false;
}
}
return res;
}
_pp_get_meta(children) {
for (let c of children) {
if (this.node_filter != null && !this.node_filter(c)) {
continue;
}
if (c instanceof Tree) {
if (!c.meta.empty) {
return c.meta;
}
} else if (c instanceof Token) {
return c;
}
}
}
}
function make_propagate_positions(option) {
if (callable(option)) {
return partial({ PropagatePositions, node_filter: option });
} else if (option === true) {
return PropagatePositions;
} else if (option === false) {
return null;
}
throw new ConfigurationError(
"Invalid option for propagate_positions: %r".format(option)
);
}
class ChildFilter {
constructor(to_include, append_none, node_builder) {
this.node_builder = node_builder;
this.to_include = to_include;
this.append_none = append_none;
}
__call__(children) {
let filtered = [];
for (let [i, to_expand, add_none] of this.to_include) {
if (add_none) {
filtered += [null] * add_none;
}
if (to_expand) {
filtered += children[i].children;
} else {
filtered.push(children[i]);
}
}
if (this.append_none) {
filtered += [null] * this.append_none;
}
return this.node_builder(filtered);
}
}
class ChildFilterLALR extends ChildFilter {
/*
Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)
*/
__call__(children) {
let filtered = [];
for (let [i, to_expand, add_none] of this.to_include) {
if (add_none) {
filtered += [null] * add_none;
}
if (to_expand) {
if (filtered) {
filtered += children[i].children;
} else {
// Optimize for left-recursion
filtered = children[i].children;
}
} else {
filtered.push(children[i]);
}
}
if (this.append_none) {
filtered += [null] * this.append_none;
}
return this.node_builder(filtered);
}
}
class ChildFilterLALR_NoPlaceholders extends ChildFilter {
/*
Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)
*/
constructor(to_include, node_builder) {
super();
this.node_builder = node_builder;
this.to_include = to_include;
}
__call__(children) {
let filtered = [];
for (let [i, to_expand] of this.to_include) {
if (to_expand) {
if (filtered) {
filtered += children[i].children;
} else {
// Optimize for left-recursion
filtered = children[i].children;
}
} else {
filtered.push(children[i]);
}
}
return this.node_builder(filtered);
}
}
function _should_expand(sym) {
return !sym.is_term && sym.name.startswith("_");
}
function maybe_create_child_filter(
expansion,
keep_all_tokens,
ambiguous,
_empty_indices
) {
let s, empty_indices; // Prepare empty_indices as: How many Nones to insert at each index?
if (_empty_indices) {
console.assert(_empty_indices.count(false) === expansion.length);
s = "".join(_empty_indices.map((b) => str(int(b))));
empty_indices = s.split("0").map((ones) => ones.length);
console.assert(empty_indices.length === expansion.length + 1, [
empty_indices,
expansion.length,
]);
} else {
empty_indices = [0] * (expansion.length + 1);
}
let to_include = [];
let nones_to_add = 0;
for (let [i, sym] of enumerate(expansion)) {
nones_to_add += empty_indices[i];
if (keep_all_tokens || !(sym.is_term && sym.filter_out)) {
to_include.push([i, _should_expand(sym), nones_to_add]);
nones_to_add = 0;
}
}
nones_to_add += empty_indices[expansion.length];
if (
_empty_indices ||
to_include.length < expansion.length ||
any(to_include.map(([i, to_expand, _]) => to_expand))
) {
if (_empty_indices || ambiguous) {
return partial(
ambiguous ? ChildFilter : ChildFilterLALR,
to_include,
nones_to_add
);
} else {
// LALR without placeholders
return partial(
ChildFilterLALR_NoPlaceholders,
to_include.map(([i, x, _]) => [i, x])
);
}
}
}
class AmbiguousExpander {
/*
Deal with the case where we're expanding children ('_rule') into a parent but the children
are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
ambiguous with as many copies as their are ambiguous children, and then copy the ambiguous children
into the right parents in the right places, essentially shifting the ambiguity up the tree.
*/
constructor(to_expand, tree_class, node_builder) {
this.node_builder = node_builder;
this.tree_class = tree_class;
this.to_expand = to_expand;
}
__call__(children) {
let to_expand;
function _is_ambig_tree(t) {
return hasattr(t, "data") && t.data === "_ambig";
}
let // -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
// All children of an _ambig node should be a derivation of that ambig node, hence
// it is safe to assume that if we see an _ambig node nested within an ambig node
// it is safe to simply expand it into the parent _ambig node as an alternative derivation.
ambiguous = [];
for (let [i, child] of enumerate(children)) {
if (_is_ambig_tree(child)) {
if (i in this.to_expand) {
ambiguous.push(i);
}
to_expand = enumerate(child.children)
.filter(([j, grandchild]) => _is_ambig_tree(grandchild))
.map(([j, grandchild]) => j);
child.expand_kids_by_index(...to_expand);
}
}
if (!ambiguous) {
return this.node_builder(children);
}
let expand = enumerate(children).map(([i, child]) =>
i in ambiguous ? iter(child.children) : repeat(child)
);
return this.tree_class(
"_ambig",
product(zip(...expand)).map((f) =>
this.node_builder(new Array.from(f[0]))
)
);
}
}
function maybe_create_ambiguous_expander(
tree_class,
expansion,
keep_all_tokens
) {
let to_expand = enumerate(expansion)
.filter(
([i, sym]) =>
keep_all_tokens ||
(!(sym.is_term && sym.filter_out) && _should_expand(sym))
)
.map(([i, sym]) => i);
if (to_expand) {
return partial(AmbiguousExpander, to_expand, tree_class);
}
}
class AmbiguousIntermediateExpander {
/*
Propagate ambiguous intermediate nodes and their derivations up to the
current rule.
In general, converts
rule
_iambig
_inter
someChildren1
...
_inter
someChildren2
...
someChildren3
...
to
_ambig
rule
someChildren1
...
someChildren3
...
rule
someChildren2
...
someChildren3
...
rule
childrenFromNestedIambigs
...
someChildren3
...
...
propagating up any nested '_iambig' nodes along the way.
*/
constructor(tree_class, node_builder) {
this.node_builder = node_builder;
this.tree_class = tree_class;
}
__call__(children) {
let processed_nodes, iambig_node, collapsed, result, new_tree;
function _is_iambig_tree(child) {
return hasattr(child, "data") && child.data === "_iambig";
}
function _collapse_iambig(children) {
let iambig_node,
collapsed,
result,
new_tree; /*
Recursively flatten the derivations of the parent of an '_iambig'
node. Returns a list of '_inter' nodes guaranteed not
to contain any nested '_iambig' nodes, or None if children does
not contain an '_iambig' node.
*/ // Due to the structure of the SPPF,
// an '_iambig' node can only appear as the first child
if (children && _is_iambig_tree(children[0])) {
iambig_node = children[0];
result = [];
for (let grandchild of iambig_node.children) {
collapsed = _collapse_iambig(grandchild.children);
if (collapsed) {
for (let child of collapsed) {
child.children += children.slice(1);
}
result += collapsed;
} else {
new_tree = this.tree_class(
"_inter",
grandchild.children + children.slice(1)
);
result.push(new_tree);
}
}
return result;
}
}
collapsed = _collapse_iambig(children);
if (collapsed) {
processed_nodes = collapsed.map((c) => this.node_builder(c.children));
return this.tree_class("_ambig", processed_nodes);
}
return this.node_builder(children);
}
}
function inplace_transformer(func) {
let // function name in a Transformer is a rule name.
tree;
function f(children) {
let // function name in a Transformer is a rule name.
tree = new Tree(func.__name__, children);
return func(tree);
}
f = wraps(func)(f);
return f;
}
function apply_visit_wrapper(func, name, wrapper) {
if (wrapper === _vargs_meta || wrapper === _vargs_meta_inline) {
throw new NotImplementedError(
"Meta args not supported for internal transformer"
);
}
function f(children) {
return wrapper(func, name, children, null);
}
f = wraps(func)(f);
return f;
}
class ParseTreeBuilder {
constructor({
rules,
tree_class,
propagate_positions = false,
ambiguous = false,
maybe_placeholders = false,
} = {}) {
this.tree_class = tree_class;
this.propagate_positions = propagate_positions;
this.ambiguous = ambiguous;
this.maybe_placeholders = maybe_placeholders;
this.rule_builders = new Array.from(this._init_builders(rules));
}
*_init_builders(rules) {
let expand_single_child, wrapper_chain, options, keep_all_tokens;
let propagate_positions = make_propagate_positions(
this.propagate_positions
);
for (let rule of rules) {
options = rule.options;
keep_all_tokens = options.keep_all_tokens;
expand_single_child = options.expand1;
wrapper_chain = new Array.from(
filter(null, [
expand_single_child && !rule.alias && ExpandSingleChild,
maybe_create_child_filter(
rule.expansion,
keep_all_tokens,
this.ambiguous,
this.maybe_placeholders ? options.empty_indices : null
),
propagate_positions,
this.ambiguous &&
maybe_create_ambiguous_expander(
this.tree_class,
rule.expansion,
keep_all_tokens
),
this.ambiguous &&
partial(AmbiguousIntermediateExpander, this.tree_class),
])
);
yield rule, wrapper_chain;
}
}
create_callback({ transformer = null } = {}) {
let f, wrapper, user_callback_name;
let callbacks = {};
for (let [rule, wrapper_chain] of this.rule_builders) {
user_callback_name =
rule.alias || rule.options.template_source || rule.origin.name;
if (transformer && transformer[user_callback_name]) {
f = transformer[user_callback_name];
wrapper = f["visit_wrapper"] || null;
if (wrapper != null) {
f = apply_visit_wrapper(f, user_callback_name, wrapper);
} else if (transformer instanceof Transformer_InPlace) {
f = inplace_transformer(f);
}
} else {
f = partial(this.tree_class, user_callback_name);
}
for (let w of wrapper_chain) {
f = w(f);
}
if (rule in callbacks) {
throw new GrammarError("Rule '%s' already exists".format(rule));
}
callbacks[rule] = f;
}
return callbacks;
}
}
class LALR_Parser extends Serialize {
constructor({ parser_conf, debug = false } = {}) {
super();
let analysis = new LALR_Analyzer({ parser_conf, debug: debug });
analysis.compute_lalr();
let callbacks = parser_conf.callbacks;
this._parse_table = analysis.parse_table;
this.parser_conf = parser_conf;
this.parser = _Parser({
parse_table: analysis.parse_table,
callbacks,
debug,
});
}
static deserialize({ data, memo, callbacks, debug = false } = {}) {
cls = LALR_Parser;
let inst = cls.__new__(cls);
inst._parse_table = new IntParseTable.deserialize(data, memo);
inst.parser = _Parser({ parse_table: inst._parse_table, callbacks, debug });
return inst;
}
serialize(memo) {
return this._parse_table.serialize(memo);
}
parse_interactive(lexer, start) {
return this.parser.parse({ lexer, start, start_interactive: true });
}
parse({ lexer, start, on_error = null } = {}) {
let p, s, e;
try {
return this.parser.parse({ lexer, start });
} catch (e) {
if (e instanceof UnexpectedInput) {
if (on_error === null) {
throw None;
}
while (true) {
if (e instanceof UnexpectedCharacters) {
s = e.interactive_parser.lexer_state.state;
p = s.line_ctr.char_pos;
}
if (!on_error(e)) {
throw e;
}
if (e instanceof UnexpectedCharacters) {
// If user didn't change the character position, then we should
if (p === s.line_ctr.char_pos) {
s.line_ctr.feed(s.text.slice(p, p + 1));
}
}
try {
return e.interactive_parser.resume_parse();
} catch (e) {
if (e instanceof UnexpectedToken) {
e2 = e;
if (
e instanceof UnexpectedToken &&
e.token.type === e2.token.type &&
e2.token.type === "$END" &&
e.interactive_parser === e2.interactive_parser
) {
// Prevent infinite loop
throw e2;
}
e = e2;
} else if (e instanceof UnexpectedCharacters) {
e2 = e;
e = e2;
} else {
throw e;
}
}
}
} else {
throw e;
}
}
}
}
class ParseConf {
constructor(parse_table, callbacks, start) {
this.parse_table = parse_table;
this.start_state = this.parse_table.start_states[start];
this.end_state = this.parse_table.end_states[start];
this.states = this.parse_table.states;
this.callbacks = callbacks;
this.start = start;
}
}
class ParserState {
constructor({
parse_conf,
lexer,
state_stack = null,
value_stack = null,
} = {}) {
this.parse_conf = parse_conf;
this.lexer = lexer;
this.state_stack = state_stack || [this.parse_conf.start_state];
this.value_stack = value_stack || [];
}
get position() {
return this.state_stack[-1];
} // Necessary for match_examples() to work
__eq__(other) {
if (!(other instanceof ParserState)) {
return NotImplemented;
}
return (
this.state_stack.length === other.state_stack.length &&
this.position === other.position
);
}
__copy__() {
return type(this)(
this.parse_conf,
this.lexer, // XXX copy
copy(this.state_stack),
deepcopy(this.value_stack)
);
}
copy() {
return copy(this);
}
feed_token({ token, is_end = false } = {}) {
let s,
state,
size,
expected, // reduce+shift as many times as necessary
rule,
value;
let state_stack = this.state_stack;
let value_stack = this.value_stack;
let states = this.parse_conf.states;
let end_state = this.parse_conf.end_state;
let callbacks = this.parse_conf.callbacks;
while (true) {
state = state_stack[-1];
try {
[action, arg] = states[state][token.type];
} catch (e) {
if (e instanceof KeyError) {
expected = new Set(
states[state]
.keys()
.filter((s) => s.isupper())
.map((s) => s)
);
throw new UnexpectedToken({
token,
expected,
state: this,
interactive_parser: null,
});
} else {
throw e;
}
}
console.assert(arg != end_state);
if (action === Shift) {
// shift once and return
console.assert(!is_end);
state_stack.push(arg);
value_stack.push(
!(token.type in callbacks) ? token : callbacks[token.type](token)
);
return;
} else {
// reduce+shift as many times as necessary
rule = arg;
size = rule.expansion.length;
if (size) {
s = value_stack.slice(-size);
} else {
s = [];
}
value = callbacks[rule](s);
[_action, new_state] = states[state_stack[-1]][rule.origin.name];
console.assert(_action === Shift);
state_stack.push(new_state);
value_stack.push(value);
if (is_end && state_stack[-1] === end_state) {
return value_stack[-1];
}
}
}
}
}
class _Parser {
constructor({ parse_table, callbacks, debug = false } = {}) {
this.parse_table = parse_table;
this.callbacks = callbacks;
this.debug = debug;
}
parse({
lexer,
start,
value_stack = null,
state_stack = null,
start_interactive = false,
} = {}) {
let parse_conf = new ParseConf(this.parse_table, this.callbacks, start);
let parser_state = new ParserState({
parse_conf,
lexer,
state_stack,
value_stack,
});
if (start_interactive) {
return new InteractiveParser(this, parser_state, parser_state.lexer);
}
return this.parse_from_state(parser_state);
}
parse_from_state(state) {
let token, end_token; // Main LALR-parser loop
try {
token = null;
for (let token of state.lexer.lex(state)) {
state.feed_token(token);
}
end_token = token
? new Token.new_borrow_pos("$END", "", token)
: new Token("$END", "", 0, 1, 1);
return state.feed_token(end_token, true);
} catch (e) {
if (e instanceof UnexpectedInput) {
try {
e.interactive_parser = new InteractiveParser(
this,
state,
state.lexer
);
} catch (e) {
if (e instanceof ReferenceError) {
// pass
} else {
throw e;
}
}
throw e;
} else if (e instanceof Error) {
if (this.debug) {
console.log("");
console.log("STATE STACK DUMP");
console.log("----------------");
for (let [i, s] of enumerate(state.state_stack)) {
console.log("%d)".format(i), s);
}
console.log("");
}
throw None;
} else {
throw e;
}
}
}
}
class Action {
constructor(name) {
this.name = name;
}
__str__() {
return this.name;
}
__repr__() {
return str(this);
}
}
Shift = new Action("Shift");
Reduce = new Action("Reduce");
class ParseTable {
constructor(states, start_states, end_states) {
this.states = states;
this.start_states = start_states;
this.end_states = end_states;
}
serialize(memo) {
let tokens = new Enumerator();
let rules = new Enumerator();
let states = new Map(
this.states
.items()
.map(([state, actions]) => [
state,
new Map(
actions
.items()
.map(([token, [action, arg]]) => [
tokens.get(token),
action === Reduce ? [1, arg.serialize(memo)] : [0, arg],
])
),
])
);
return {
tokens: tokens.reversed(),
states: states,
start_states: this.start_states,
end_states: this.end_states,
};
}
static deserialize(data, memo) {
cls = ParseTable;
let tokens = data["tokens"];
let states = new Map(
data["states"]
.items()
.map(([state, actions]) => [
state,
new Map(
actions
.items()
.map(([token, [action, arg]]) => [
tokens[token],
action === 1
? [Reduce, new Rule.deserialize(arg, memo)]
: [Shift, arg],
])
),
])
);
return new cls(states, data["start_states"], data["end_states"]);
}
}
class IntParseTable extends ParseTable {
static from_ParseTable(parse_table) {
cls = IntParseTable;
let la;
let enum_ = new Array.from(parse_table.states);
let state_to_idx = new Map(enumerate(enum_).map(([i, s]) => [s, i]));
let int_states = {};
for (let [s, la] of parse_table.states.items()) {
la = new Map(
la
.items()
.map(([k, v]) => [k, v[0] === Shift ? [v[0], state_to_idx[v[1]]] : v])
);
int_states[state_to_idx[s]] = la;
}
let start_states = new Map(
parse_table.start_states
.items()
.map(([start, s]) => [start, state_to_idx[s]])
);
let end_states = new Map(
parse_table.end_states
.items()
.map(([start, s]) => [start, state_to_idx[s]])
);
return new cls(int_states, start_states, end_states);
}
}
function _wrap_lexer(lexer_class) {
let future_interface = lexer_class["__future_interface__"] || false;
if (future_interface) {
return lexer_class;
} else {
class CustomLexerWrapper extends Lexer {
constructor(lexer_conf) {
super();
this.lexer = lexer_class(lexer_conf);
}
lex(lexer_state, parser_state) {
return this.lexer.lex(lexer_state.text);
}
}
return CustomLexerWrapper;
}
}
class MakeParsingFrontend {
constructor(parser_type, lexer_type) {
this.parser_type = parser_type;
this.lexer_type = lexer_type;
}
__call__(lexer_conf, parser_conf, options) {
console.assert(lexer_conf instanceof LexerConf);
console.assert(parser_conf instanceof ParserConf);
parser_conf.parser_type = this.parser_type;
lexer_conf.lexer_type = this.lexer_type;
return new ParsingFrontend({ lexer_conf, parser_conf, options });
}
static deserialize(data, memo, lexer_conf, callbacks, options) {
cls = MakeParsingFrontend;
let parser_conf = new ParserConf.deserialize(data["parser_conf"], memo);
let parser = new LALR_Parser.deserialize(
data["parser"],
memo,
callbacks,
options.debug
);
parser_conf.callbacks = callbacks;
return new ParsingFrontend({
lexer_conf,
parser_conf,
options,
parser: parser,
});
}
}
class ParsingFrontend extends Serialize {
constructor({ lexer_conf, parser_conf, options, parser = null } = {}) {
super();
let create_parser, create_lexer;
this.parser_conf = parser_conf;
this.lexer_conf = lexer_conf;
this.options = options; // Set-up parser
if (parser) {
// From cache
this.parser = parser;
} else {
create_parser = {
lalr: create_lalr_parser,
earley: create_earley_parser,
cyk: CYK_FrontEnd,
}[parser_conf.parser_type];
this.parser = create_parser({ lexer_conf, parser_conf, options });
}
let // Set-up lexer
lexer_type = lexer_conf.lexer_type;
this.skip_lexer = false;
if (lexer_type in ["dynamic", "dynamic_complete"]) {
console.assert(lexer_conf.postlex === null);
this.skip_lexer = true;
return;
}
exc = null;
try {
create_lexer = {
standard: create_traditional_lexer,
contextual: create_contextual_lexer,
}[lexer_type];
} catch (e) {
exc = e;
if (e instanceof KeyError) {
console.assert(issubclass(lexer_type, Lexer), lexer_type);
this.lexer = _wrap_lexer(lexer_type)(lexer_conf);
} else {
throw e;
}
}
if (!exc) {
this.lexer = create_lexer(lexer_conf, this.parser, lexer_conf.postlex);
}
if (lexer_conf.postlex) {
this.lexer = new PostLexConnector(this.lexer, lexer_conf.postlex);
}
}
_verify_start({ start = null } = {}) {
let start_decls;
if (start === null) {
start_decls = this.parser_conf.start;
if (start_decls.length > 1) {
throw new ConfigurationError(
"Lark initialized with more than 1 possible start rule. Must specify which start rule to parse",
start_decls
);
}
[start] = start_decls;
} else if (!(start in this.parser_conf.start)) {
throw new ConfigurationError(
"Unknown start rule %s. Must be one of %r".format(
start,
this.parser_conf.start
)
);
}
return start;
}
parse({ text, start = null, on_error = null } = {}) {
let chosen_start = this._verify_start({ start });
let stream = this.skip_lexer ? text : new LexerThread(this.lexer, text);
let kw = on_error === null ? {} : { on_error: on_error };
return this.parser.parse({ stream, chosen_start, ...kw });
}
parse_interactive({ text = null, start = null } = {}) {
let chosen_start = this._verify_start(start);
if (this.parser_conf.parser_type != "lalr") {
throw new ConfigurationError(
"parse_interactive() currently only works with parser='lalr' "
);
}
let stream = this.skip_lexer ? text : new LexerThread(this.lexer, text);
return this.parser.parse_interactive(stream, chosen_start);
}
}
function get_frontend(parser, lexer) {
let // not custom lexer?
expected;
assert_config(parser, ["lalr", "earley", "cyk"]);
if (!(typeof lexer === "object")) {
// not custom lexer?
expected = {
lalr: ["standard", "contextual"],
earley: ["standard", "dynamic", "dynamic_complete"],
cyk: ["standard"],
}[parser];
assert_config(
lexer,
expected,
"Parser %r does not support lexer %%r, expected one of %%s".format(parser)
);
}
return new MakeParsingFrontend(parser, lexer);
}
function _get_lexer_callbacks(transformer, terminals) {
let callback;
let result = {};
for (let terminal of terminals) {
callback = transformer[terminal.name] || null;
if (callback != null) {
result[terminal.name] = callback;
}
}
return result;
}
class PostLexConnector {
constructor(lexer, postlexer) {
this.lexer = lexer;
this.postlexer = postlexer;
}
make_lexer_state(text) {
return this.lexer.make_lexer_state(text);
}
lex(lexer_state, parser_state) {
let i = this.lexer.lex(lexer_state, parser_state);
return this.postlexer.process(i);
}
}
function create_traditional_lexer(lexer_conf, parser, postlex) {
return new TraditionalLexer(lexer_conf);
}
function create_contextual_lexer(lexer_conf, parser, postlex) {
let states = new Map(
parser._parse_table.states
.items()
.map(([idx, t]) => [idx, new Array.from(t.keys())])
);
let always_accept = postlex ? postlex.always_accept : [];
return new ContextualLexer({
lexer_conf,
states,
always_accept: always_accept,
});
}
function create_lalr_parser({ lexer_conf, parser_conf, options = null } = {}) {
let debug = options ? options.debug : false;
return new LALR_Parser({ parser_conf, debug: debug });
}
create_earley_parser = NotImplemented;
CYK_FrontEnd = NotImplemented;
class LarkOptions extends Serialize {
/*
Specifies the options for Lark
*/
get OPTIONS_DOC() {
return `
**=== General Options ===**
start
The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
debug
Display debug information and extra warnings. Use only when debugging (default: False)
When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
transformer
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
propagate_positions
Propagates (line, column, end_line, end_column) attributes into all tree branches.
Accepts ````False````, ````True````, or a callable, which will filter which nodes to ignore when propagating.
maybe_placeholders
When ````True````, the ````[]```` operator returns ````None```` when not matched.
When ````False````, ````[]```` behaves like the ````?```` operator, and returns no value at all.
(default= ````False````. Recommended to set to ````True````)
cache
Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
- When ````False````, does nothing (default)
- When ````True````, caches to a temporary file in the local directory
- When given a string, caches to the path pointed by the string
regex
When True, uses the ````regex```` module instead of the stdlib ````re````.
g_regex_flags
Flags that are applied to all terminals (both regex and strings)
keep_all_tokens
Prevent the tree builder from automagically removing "punctuation" tokens (default: False)
tree_class
Lark will produce trees comprised of instances of this class instead of the default ````lark.Tree````.
**=== Algorithm Options ===**
parser
Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
(there is also a "cyk" option for legacy)
lexer
Decides whether or not to use a lexer stage
- "auto" (default): Choose for me based on the parser
- "standard": Use a standard lexer
- "contextual": Stronger lexer (only works with parser="lalr")
- "dynamic": Flexible and powerful (only with parser="earley")
- "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
ambiguity
Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
- "resolve": The parser will automatically choose the simplest derivation
(it chooses consistently: greedy for tokens, non-greedy for rules)
- "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
- "forest": The parser will return the root of the shared packed parse forest.
**=== Misc. / Domain Specific Options ===**
postlex
Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
priority
How priorities should be evaluated - auto, none, normal, invert (Default: auto)
lexer_callbacks
Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
use_bytes
Accept an input of type ````bytes```` instead of ````str```` (Python 3 only).
edit_terminals
A callback for editing the terminals before parse.
import_paths
A List of either paths or loader functions to specify from where grammars are imported
source_path
Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
**=== End Options ===**
`;
}
get // Adding a new option needs to be done in multiple places:
// - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts
// - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs
// - In `lark-stubs/lark.pyi`:
// - As attribute to `LarkOptions`
// - As parameter to `Lark.__init__`
// - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
// - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
_defaults() {
return {
debug: false,
keep_all_tokens: false,
tree_class: null,
cache: false,
postlex: null,
parser: "earley",
lexer: "auto",
transformer: null,
start: "start",
priority: "auto",
ambiguity: "auto",
regex: false,
propagate_positions: false,
lexer_callbacks: {},
maybe_placeholders: false,
edit_terminals: null,
g_regex_flags: 0,
use_bytes: false,
import_paths: [],
source_path: null,
};
}
constructor(options_dict) {
super();
let value;
let o = dict(options_dict);
let options = {};
for (let [name, default_] of this._defaults.items()) {
if (name in o) {
value = o.pop(name);
if (
default_ instanceof bool &&
!(name in ["cache", "use_bytes", "propagate_positions"])
) {
value = bool(value);
}
} else {
value = default_;
}
options[name] = value;
}
if (typeof options["start"] === "string") {
options["start"] = [options["start"]];
}
this["options"] = options;
assert_config(this.parser, ["earley", "lalr", "cyk", null]);
if (this.parser === "earley" && this.transformer) {
throw new ConfigurationError(
"Cannot specify an embedded transformer when using the Earley algorithm. " +
"Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)"
);
}
if (o) {
throw new ConfigurationError("Unknown options: %s".format(o.keys()));
}
}
__getattr__(name) {
try {
return this["options"][name];
} catch (e) {
if (e instanceof KeyError) {
throw new AttributeError(e);
} else {
throw e;
}
}
}
__setattr__(name, value) {
assert_config(
name,
this.options.keys(),
"%r isn't a valid option. Expected one of: %s"
);
this.options[name] = value;
}
serialize(memo) {
return this.options;
}
static deserialize(data, memo) {
cls = LarkOptions;
return new cls(data);
}
} // Options that can be passed to the Lark parser, even when it was loaded from cache/standalone.
// These option are only used outside of `load_grammar`.
_LOAD_ALLOWED_OPTIONS = new Set([
"postlex",
"transformer",
"lexer_callbacks",
"use_bytes",
"debug",
"g_regex_flags",
"regex",
"propagate_positions",
"tree_class",
]);
_VALID_PRIORITY_OPTIONS = ["auto", "normal", "invert", null];
_VALID_AMBIGUITY_OPTIONS = ["auto", "resolve", "explicit", "forest"];
class PostLex extends ABC {
process(stream) {
return stream;
}
get always_accept() {
return [];
}
}
class Lark extends Serialize {
/*
Main interface for the library.
It's mostly a thin wrapper for the many different parsers, and for the tree constructor.
Parameters:
grammar: a string or file-object containing the grammar spec (using Lark's ebnf syntax)
options: a dictionary controlling various aspects of Lark.
Example:
>>> Lark(r'''start: "foo" ''')
Lark(...)
*/
constructor({ grammar, ...options } = {}) {
super();
let cached_parser_data,
read,
s,
terminals_to_keep,
file_md5,
options_str,
re_module,
cached_used_files,
old_options,
unhashable;
this.options = new LarkOptions(options);
let // Set regex or re module
use_regex = this.options.regex;
if (use_regex) {
if (regex) {
re_module = regex;
} else {
throw new ImportError(
"`regex` module must be installed if calling `Lark(regex=True)`."
);
}
} else {
re_module = re;
} // Some, but not all file-like objects have a 'name' attribute
if (this.options.source_path === null) {
if (grammar && grammar.name) {
this.source_path = grammar.name;
} else {
this.source_path = "<string>";
}
} else {
this.source_path = this.options.source_path;
} // Drain file-like objects to get their contents
if (grammar && grammar.read) {
// Drain file-like objects to get their contents
read = grammar.read;
grammar = read();
} else {
// pass
}
let cache_fn = null;
let cache_md5 = null;
if (typeof grammar === "string") {
this.source_grammar = grammar;
if (this.options.use_bytes) {
if (!isascii(grammar)) {
throw new ConfigurationError(
"Grammar must be ascii only, when use_bytes=True"
);
}
}
if (this.options.cache) {
if (this.options.parser != "lalr") {
throw new ConfigurationError(
"cache only works with parser='lalr' for now"
);
}
unhashable = [
"transformer",
"postlex",
"lexer_callbacks",
"edit_terminals",
];
options_str = "".join(
options
.items()
.filter(([k, v]) => !(k in unhashable))
.map(([k, v]) => k + str(v))
);
s =
grammar + options_str + __version__ + str(sys.version_info.slice(2));
cache_md5 = hashlib.md5(s.encode("utf8")).hexdigest();
if (typeof this.options.cache === "string") {
cache_fn = this.options.cache;
} else {
if (this.options.cache != true) {
throw new ConfigurationError("cache argument must be bool or str");
} // Python2.7 doesn't support * syntax in tuples
cache_fn =
tempfile.gettempdir() +
"/.lark_cache_%s_%s_%s.tmp".format(
[cache_md5] + sys.version_info.slice(2)
);
}
if (new FS.exists(cache_fn)) {
logger.debug("Loading grammar from cache: %s", cache_fn); // Remove options that aren't relevant for loading from cache
for (let name of new Set(options) - _LOAD_ALLOWED_OPTIONS) {
}
old_options = this.options;
try {
file_md5 = f.readline().rstrip("\n");
cached_used_files = pickle.load(f);
if (
file_md5 === cache_md5.encode("utf8") &&
verify_used_files(cached_used_files)
) {
cached_parser_data = pickle.load(f);
this._load({ cached_parser_data, ...options });
return;
}
} catch (e) {
if (e instanceof Error) {
// We should probably narrow done which errors we catch here.
logger.exception(
"Failed to load Lark from cache: %r. We will try to carry on.".format(
cache_fn
)
); // In theory, the Lark instance might have been messed up by the call to `_load`.
// In practice the only relevant thing that might have been overriden should be `options`
this.options = old_options;
} else {
throw e;
}
}
}
}
[
// Parse the grammar file and compose the grammars
this.grammar,
used_files,
] = load_grammar(
grammar,
this.source_path,
this.options.import_paths,
this.options.keep_all_tokens
);
} else {
console.assert(grammar instanceof Grammar);
this.grammar = grammar;
}
if (this.options.lexer === "auto") {
if (this.options.parser === "lalr") {
this.options.lexer = "contextual";
} else if (this.options.parser === "earley") {
if (this.options.postlex != null) {
logger.info(
"postlex can't be used with the dynamic lexer, so we use standard instead. " +
"Consider using lalr with contextual instead of earley"
);
this.options.lexer = "standard";
} else {
this.options.lexer = "dynamic";
}
} else if (this.options.parser === "cyk") {
this.options.lexer = "standard";
} else {
console.assert(false, this.options.parser);
}
}
let lexer = this.options.lexer;
if (typeof lexer === "object") {
console.assert(issubclass(lexer, Lexer));
} else {
// XXX Is this really important? Maybe just ensure interface compliance
assert_config(lexer, [
"standard",
"contextual",
"dynamic",
"dynamic_complete",
]);
if (this.options.postlex != null && "dynamic" in lexer) {
throw new ConfigurationError(
"Can't use postlex with a dynamic lexer. Use standard or contextual instead"
);
}
}
if (this.options.ambiguity === "auto") {
if (this.options.parser === "earley") {
this.options.ambiguity = "resolve";
}
} else {
assert_config(
this.options.parser,
["earley", "cyk"],
"%r doesn't support disambiguation. Use one of these parsers instead: %s"
);
}
if (this.options.priority === "auto") {
this.options.priority = "normal";
}
if (!(this.options.priority in _VALID_PRIORITY_OPTIONS)) {
throw new ConfigurationError(
"invalid priority option: %r. Must be one of %r".format(
this.options.priority,
_VALID_PRIORITY_OPTIONS
)
);
}
console.assert(
!(this.options.ambiguity in ["resolve__antiscore_sum"]),
'resolve__antiscore_sum has been replaced with the option priority="invert"'
);
if (!(this.options.ambiguity in _VALID_AMBIGUITY_OPTIONS)) {
throw new ConfigurationError(
"invalid ambiguity option: %r. Must be one of %r".format(
this.options.ambiguity,
_VALID_AMBIGUITY_OPTIONS
)
);
}
if (this.options.postlex != null) {
terminals_to_keep = new Set(this.options.postlex.always_accept);
} else {
terminals_to_keep = new Set();
}
[
// Compile the EBNF grammar into BNF
this.terminals,
this.rules,
this.ignore_tokens,
] = this.grammar.compile(this.options.start, terminals_to_keep);
if (this.options.edit_terminals) {
for (let t of this.terminals) {
this.options.edit_terminals(t);
}
}
this._terminals_dict = new Map(this.terminals.map((t) => [t.name, t])); // If the user asked to invert the priorities, negate them all here.
// This replaces the old 'resolve__antiscore_sum' option.
if (this.options.priority === "invert") {
for (let rule of this.rules) {
if (rule.options.priority != null) {
rule.options.priority = -rule.options.priority;
}
}
}
// Else, if the user asked to disable priorities, strip them from the
// rules. This allows the Earley parsers to skip an extra forest walk
// for improved performance, if you don't need them (or didn't specify any).
else if (this.options.priority === null) {
for (let rule of this.rules) {
if (rule.options.priority != null) {
rule.options.priority = null;
}
}
} // TODO Deprecate lexer_callbacks?
this.lexer_conf = new LexerConf({
unknown_param: this.terminals,
re_module,
unknown_param: this.ignore_tokens,
unknown_param: this.options.postlex,
unknown_param: this.options.lexer_callbacks,
unknown_param: this.options.g_regex_flags,
use_bytes: this.options.use_bytes,
});
if (this.options.parser) {
this.parser = this._build_parser();
} else if (lexer) {
this.lexer = this._build_lexer();
}
if (cache_fn) {
logger.debug("Saving grammar to cache: %s", cache_fn);
f.write(cache_md5.encode("utf8") + "\n");
pickle.dump(used_files, f);
this.save(f);
}
}
_build_lexer({ dont_ignore = false } = {}) {
let lexer_conf = this.lexer_conf;
if (dont_ignore) {
lexer_conf = copy(lexer_conf);
lexer_conf.ignore = [];
}
return new TraditionalLexer(lexer_conf);
}
_prepare_callbacks() {
this._callbacks = {}; // we don't need these callbacks if we aren't building a tree
if (this.options.ambiguity != "forest") {
this._parse_tree_builder = new ParseTreeBuilder(
this.rules,
this.options.tree_class || Tree,
this.options.propagate_positions,
this.options.parser != "lalr" && this.options.ambiguity === "explicit",
this.options.maybe_placeholders
);
this._callbacks = this._parse_tree_builder.create_callback(
this.options.transformer
);
}
this._callbacks.update(
_get_lexer_callbacks(this.options.transformer, this.terminals)
);
}
_build_parser() {
this._prepare_callbacks();
let parser_class = get_frontend(this.options.parser, this.options.lexer);
let parser_conf = new ParserConf(
this.rules,
this._callbacks,
this.options.start
);
return parser_class({
unknown_param: this.lexer_conf,
parser_conf,
options: this.options,
});
}
save(f) {
/*
Saves the instance into the given file object
Useful for caching and multiprocessing.
*/
[data, m] = this.memo_serialize([TerminalDef, Rule]);
pickle.dump({
unknown_param: { data: data, memo: m },
f,
protocol: pickle.HIGHEST_PROTOCOL,
});
}
static load(f) {
cls = Lark; /*
Loads an instance from the given file object
Useful for caching and multiprocessing.
*/
let inst = cls.__new__(cls);
return inst._load(f);
}
_deserialize_lexer_conf(data, memo, options) {
let lexer_conf = new LexerConf.deserialize(data["lexer_conf"], memo);
lexer_conf.callbacks = options.lexer_callbacks || {};
lexer_conf.re_module = options.regex ? regex : re;
lexer_conf.use_bytes = options.use_bytes;
lexer_conf.g_regex_flags = options.g_regex_flags;
lexer_conf.skip_validation = true;
lexer_conf.postlex = options.postlex;
return lexer_conf;
}
_load({ f, ...kwargs } = {}) {
let d;
if (f instanceof dict) {
d = f;
} else {
d = pickle.load(f);
}
let memo_json = d["memo"];
let data = d["data"];
console.assert(memo_json);
let memo = new SerializeMemoizer.deserialize(
memo_json,
{ Rule: Rule, TerminalDef: TerminalDef },
{}
);
let options = dict(data["options"]);
if (
(new Set(kwargs) - _LOAD_ALLOWED_OPTIONS) &
new Set(LarkOptions._defaults)
) {
throw new ConfigurationError(
"Some options are not allowed when loading a Parser: {}".format(
new Set(kwargs) - _LOAD_ALLOWED_OPTIONS
)
);
}
options.update(kwargs);
this.options = new LarkOptions.deserialize(options, memo);
this.rules = data["rules"].map((r) => new Rule.deserialize(r, memo));
this.source_path = "<deserialized>";
let parser_class = get_frontend(this.options.parser, this.options.lexer);
this.lexer_conf = this._deserialize_lexer_conf(
data["parser"],
memo,
this.options
);
this.terminals = this.lexer_conf.terminals;
this._prepare_callbacks();
this._terminals_dict = new Map(this.terminals.map((t) => [t.name, t]));
this.parser = parser_class.deserialize(
data["parser"],
memo,
this.lexer_conf,
this._callbacks,
this.options
);
return this;
}
static _load_from_dict({ data, memo, ...kwargs } = {}) {
cls = Lark;
let inst = cls.__new__(cls);
return inst._load({ unknown_param: { data: data, memo: memo }, ...kwargs });
}
static open({ grammar_filename, rel_to = null, ...options } = {}) {
cls = Lark;
let basepath; /*
Create an instance of Lark with the grammar given by its filename
If ``rel_to`` is provided, the function will find the grammar filename in relation to it.
Example:
>>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr")
Lark(...)
*/
if (rel_to) {
basepath = os.path.dirname(rel_to);
grammar_filename = os.path.join(basepath, grammar_filename);
}
return new cls({ f, ...options });
}
static open_from_package({
package_,
grammar_path,
search_paths = [""],
...options
} = {}) {
cls = Lark; /*
Create an instance of Lark with the grammar loaded from within the package `package`.
This allows grammar loading from zipapps.
Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader`
Example:
Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...)
*/
let package_loader = new FromPackageLoader(package_, search_paths);
[full_path, text] = package_loader(null, grammar_path);
options.setdefault("source_path", full_path);
options.setdefault("import_paths", []);
options["import_paths"].push(package_loader);
return new cls({ text, ...options });
}
__repr__() {
return "Lark(open(%r), parser=%r, lexer=%r, ...)".format(
this.source_path,
this.options.parser,
this.options.lexer
);
}
lex({ text, dont_ignore = false } = {}) {
let lexer; /*
Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'
When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore.
*/
if (!hasattr(this, "lexer") || dont_ignore) {
lexer = this._build_lexer({ dont_ignore });
} else {
lexer = this.lexer;
}
let lexer_thread = new LexerThread(lexer, text);
let stream = lexer_thread.lex(null);
if (this.options.postlex) {
return this.options.postlex.process(stream);
}
return stream;
}
get_terminal(name) {
/*
Get information about a terminal
*/
return this._terminals_dict[name];
}
parse_interactive({ text = null, start = null } = {}) {
/*
Start an interactive parsing session.
Parameters:
text (str, optional): Text to be parsed. Required for ``resume_parse()``.
start (str, optional): Start symbol
Returns:
A new InteractiveParser instance.
See Also: ``Lark.parse()``
*/
return this.parser.parse_interactive({ text, start: start });
}
parse({ text, start = null, on_error = null } = {}) {
/*
Parse the given text, according to the options provided.
Parameters:
text (str): Text to be parsed.
start (str, optional): Required if Lark was given multiple possible start symbols (using the start option).
on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing.
LALR only. See examples/advanced/error_handling.py for an example of how to use on_error.
Returns:
If a transformer is supplied to ``__init__``, returns whatever is the
result of the transformation. Otherwise, returns a Tree instance.
*/
return this.parser.parse({ text, start: start, on_error: on_error });
}
}
class DedentError extends LarkError {
// pass
}
class Indenter extends PostLex {
constructor() {
super();
this.paren_level = null;
this.indent_level = null;
console.assert(this.tab_len > 0);
}
*handle_NL(token) {
if (this.paren_level > 0) {
return;
}
yield token;
let indent_str = token.rsplit("\n", 1)[1];
let // Tabs and spaces
indent = indent_str.count(" ") + indent_str.count("\t") * this.tab_len;
if (indent > this.indent_level[-1]) {
this.indent_level.push(indent);
yield new Token.new_borrow_pos(this.INDENT_type, indent_str, token);
} else {
while (indent < this.indent_level[-1]) {
this.indent_level.pop();
yield new Token.new_borrow_pos(this.DEDENT_type, indent_str, token);
}
if (indent != this.indent_level[-1]) {
throw new DedentError(
"Unexpected dedent to column %s. Expected dedent to %s".format(
indent,
this.indent_level[-1]
)
);
}
}
}
*_process(stream) {
for (let token of stream) {
if (token.type === this.NL_type) {
for (let t of this.handle_NL(token)) {
yield t;
}
} else {
yield token;
}
if (token.type in this.OPEN_PAREN_types) {
this.paren_level += 1;
} else if (token.type in this.CLOSE_PAREN_types) {
this.paren_level -= 1;
console.assert(this.paren_level >= 0);
}
}
while (this.indent_level.length > 1) {
this.indent_level.pop();
yield new Token(this.DEDENT_type, "");
}
console.assert(this.indent_level === [0], this.indent_level);
}
process(stream) {
this.paren_level = 0;
this.indent_level = [0];
return this._process(stream);
}
get always_accept() {
return [this.NL_type];
}
}
module.exports = {
LarkError,
ConfigurationError,
assert_config,
GrammarError,
ParseError,
LexError,
UnexpectedInput,
UnexpectedEOF,
UnexpectedCharacters,
UnexpectedToken,
VisitError,
Meta,
Discard,
Transformer,
Transformer_InPlace,
Transformer_NonRecursive,
Transformer_InPlaceRecursive,
VisitorBase,
Visitor,
Visitor_Recursive,
Interpreter,
Symbol,
Terminal,
NonTerminal,
RuleOptions,
Rule,
Pattern,
PatternStr,
PatternRE,
Token,
LineCounter,
UnlessCallback,
CallChain,
Lexer,
TraditionalLexer,
LexerState,
ContextualLexer,
LexerThread,
LexerConf,
ParserConf,
ExpandSingleChild,
PropagatePositions,
make_propagate_positions,
ChildFilter,
ChildFilterLALR,
ChildFilterLALR_NoPlaceholders,
_should_expand,
maybe_create_child_filter,
AmbiguousExpander,
maybe_create_ambiguous_expander,
AmbiguousIntermediateExpander,
inplace_transformer,
apply_visit_wrapper,
ParseTreeBuilder,
LALR_Parser,
ParseConf,
ParserState,
_Parser,
Action,
ParseTable,
IntParseTable,
_wrap_lexer,
MakeParsingFrontend,
ParsingFrontend,
get_frontend,
_get_lexer_callbacks,
PostLexConnector,
create_traditional_lexer,
create_contextual_lexer,
create_lalr_parser,
LarkOptions,
PostLex,
Lark,
DedentError,
Indenter,
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment