-
-
Save pervognsen/815b208b86066f6d7a00 to your computer and use it in GitHub Desktop.
parser.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
from dataflow import fix | |
def memo(f): | |
cache = {} | |
def f_memo(*args): | |
try: | |
return cache[args] | |
except: | |
result = cache[args] = f(*args) | |
return result | |
return f_memo | |
def memofix(*args, **kwargs): | |
return lambda f: memo(fix(*args, **kwargs)(f)) | |
def lazy(f, *args, **kwargs): | |
value = [] | |
class proxy: | |
def __hash__(self): | |
return id(self) | |
def __eq__(self, other): | |
return id(self) == id(other) | |
def __getattr__(self, name): | |
if not value: value[:] = [f(*args, **kwargs)] | |
return getattr(value[0], name) | |
return proxy() | |
def record(fields=''): | |
def decorator(cls): | |
tuple_cls = namedtuple(cls.__name__, fields) | |
class record_cls(cls, tuple_cls): | |
def __hash__(self): | |
return id(self) | |
def __eq__(self, other): | |
return id(self) == id(other) | |
__repr__ = memofix(cls.__name__ + '(...)')(tuple_cls.__repr__) | |
return record_cls | |
return decorator | |
@record() | |
class Epsilon: | |
def nullable(self): | |
return True | |
def derive(self, char): | |
return empty | |
def compact(self): | |
return self | |
def size(self): | |
return 1 | |
@record() | |
class Empty: | |
def nullable(self): | |
return False | |
def derive(self, char): | |
return empty | |
def compact(self): | |
return self | |
def size(self): | |
return 1 | |
@record('char') | |
class Lit: | |
def nullable(self): | |
return False | |
def derive(self, char): | |
return epsilon if char == self.char else empty | |
def compact(self): | |
return self | |
def size(self): | |
return 1 | |
@record('left right') | |
class Alt: | |
def nullable(self): | |
return nullable(self.left) or nullable(self.right) | |
def derive(self, char): | |
return alt(lazy(derive, self.left, char), lazy(derive, self.right, char)) | |
def compact(self): | |
if self.left is empty: | |
return compact(self.right) | |
if self.right is empty: | |
return compact(self.left) | |
return alt(compact(self.left), compact(self.right)) | |
def size(self): | |
return 1 + size(self.left) + size(self.right) | |
@record('left right') | |
class Seq: | |
def nullable(self): | |
return nullable(self.left) and nullable(self.right) | |
def derive(self, char): | |
d = seq(lazy(derive, self.left, char), self.right) | |
return alt(d, lazy(derive, self.right, char)) if nullable(self.left) else d | |
def compact(self): | |
if self.left is empty or self.right is empty: | |
return empty | |
if self.left is epsilon: | |
return compact(self.right) | |
if self.right is epsilon: | |
return compact(self.left) | |
return seq(compact(self.left), compact(self.right)) | |
def size(self): | |
return 1 + size(self.left) + size(self.right) | |
epsilon = Epsilon() | |
empty = Empty() | |
def lit(chars): | |
return seq(*map(Lit, chars)) | |
@memofix(Alt) | |
def alt2(left, right): | |
return compact(Alt(left, right)) | |
@memofix(Seq) | |
def seq2(left, right): | |
return compact(Seq(left, right)) | |
def seq(*parsers): | |
return reduce(seq2, parsers, epsilon) | |
def alt(*parsers): | |
return reduce(alt2, parsers, empty) | |
def rep(parser): | |
r = lazy(lambda: alt(epsilon, seq(parser, r))) | |
return r | |
@memofix(False) | |
def nullable(parser): | |
return parser.nullable() | |
@memo | |
def derive(parser, char): | |
return parser.derive(char) | |
@memofix(lambda parser: parser) | |
def compact(parser): | |
return parser.compact() | |
@memofix(0) | |
def size(parser): | |
return parser.size() | |
def parses(parser, string): | |
for char in string: | |
parser = derive(parser, char) | |
return nullable(parser) | |
# examples | |
if __name__ == '__main__': | |
xs = rep(lit('xy')) | |
print parses(xs, 100000 * 'xy') |
Sorry for the very belated reply. The implementation of 'fix' is here: https://gist.github.com/pervognsen/8dafe21038f3b513693e. I also pasted it into the top of the file, so it's self contained.
Can you provide an example of a non-regular context free language (e.g. A := '(' A ')' | epsilon)?
This implements the language recognition decision procedure. Have you attempted to produce the actual parse trees?
Here is implementation in JS https://github.com/stereobooster/parsing-with-derivalives
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Could you tell me please from where did you get 'dataflow' package? There is one: https://pypi.python.org/pypi/dataflow/0.1.1
but there is no 'fix' there