Instantly share code, notes, and snippets.

cheery/parser.py Last active Mar 20, 2016

Tried to write a derivatives based parser.
 # Implementing "parsing with derivatives" # http://matt.might.net/articles/parsing-with-derivatives/ # Matt claims the algorithm terminates with laziness and memoization # Though nullable -function must be computed as a least-fixed-point # The grammars are modelled as a graph of parser combinators. def main(): B = Name('B') B.bind((B+Char('x')) | empty) fixpoint_flow(B) result = derivative(B.value, 'x') print result, is_nullable(result) result = derivative(result, 'x') print result, is_nullable(result) result = derivative(result, 'x') print result, is_nullable(result) result = derivative(result, 'x') print result, is_nullable(result) result = derivative(result, 'x') print result, is_nullable(result) # Output: # D'x'(B)x | \$ True # D'x'(D'x'(B))x | \$ True # D'x'(D'x'(D'x'(B)))x | \$ True # D'x'(D'x'(D'x'(D'x'(B))))x | \$ True # D'x'(D'x'(D'x'(D'x'(D'x'(B)))))x | \$ True result = derivative(result, 'y') print result, is_nullable(result) result = derivative(result, 'x') print result, is_nullable(result) result = derivative(result, 'x') print result, is_nullable(result) # Output: # 0 False # 0 False # 0 False # Here's yet another try. result = derivative(B.value, 'y') print result, is_nullable(result) result = derivative(result, 'y') print result, is_nullable(result) # Output: # 0 False # 0 False # An another, although simple grammar. E, T = Name('E'), Name('T') E.bind(T | (E + Char('+') + T)) T.bind(Char('x') | (Char('x') + T)) result = derivative(E.value, 'x') print result, is_nullable(result) result = derivative(result, '+') print result, is_nullable(result) result = derivative(result, 'x') print result, is_nullable(result) result = derivative(result, 'x') print result, is_nullable(result) # Output: # D'x'(T) | D'x'(E)+T True # T False # D'x'(T) True # D'x'(D'x'(T)) True class Combinator(object): nullable = None # means for undetermined nullability depends = frozenset() # set of name combinators the combinator's # nullability depends on. A common case is # the result doesn't depend on other combinators. first = frozenset() def __or__(self, other): if self is fail: # avoid producing unneeded combinators when combining them. return other if other is fail: return self return Choice(self, other) def __add__(self, other): if self is fail or other is fail: return fail if self is empty: return other if other is empty: return self return Cat(self, other) def depend(self): # Used to build the 'depends' -set when combining combinators. return self.depends def rewrite(self): # Called when the combinator needs to be 'evaluated' return self # default is that the combinator doesn't reduce further. class Empty(Combinator): # Accepts an empty string. nullable = True def __init__(self): self.first = frozenset([self]) def __repr__(self): return "\$" class Fail(Combinator): # Accepts nothing, attempted to be pruned out when combining nullable = False def __repr__(self): return "0" def __or__(self, other): return other class Char(Combinator): # Accepts a character nullable = False def __init__(self, char): self.char = char self.first = frozenset(char) def __repr__(self): return self.char class Name(Combinator): # The 'nonterminal' building block to def __init__(self, name): # construct recursive grammars self.name = name self.value = fail self.depends = None self.flow = set() # When nullability changes, the nullability of names in # this set should change too. self.first = None # The characters that this combinator can start with. def __repr__(self): return self.name def bind(self, value): # bind assigns a value to the name. assert self.nullable is None # to verify nullability is correct, we prefer self.value = value # to think of Name as an immutable construct. self.nullable = None self.depends = self.value.depend() for name in self.depends: # used to calculate nullability name.flow.add(self) def depend(self): return {self} def rewrite(self): # Name should evaluate to its value. return self.value class Cat(Combinator): def __init__(self, lhs, rhs): self.lhs = lhs self.rhs = rhs self.depends = self.lhs.depend() | self.rhs.depend() def __repr__(self): # my notation needs parentheses if Choice appears inside Cat. lhs = "({})".format(repr(self.lhs)) if isinstance(self.lhs, Choice) else repr(self.lhs) rhs = "({})".format(repr(self.rhs)) if isinstance(self.rhs, Choice) else repr(self.rhs) return lhs + rhs @property def first(self): lhs = self.lhs.first if empty in lhs: return lhs | self.rhs.first return lhs # the example doesn't use this function, so bit unsure about the correctness here. def rewrite(self): # bias to left rewrite in Catenation. return self.lhs.rewrite() + self.rhs class Choice(Combinator): def __init__(self, lhs, rhs): self.lhs = lhs self.rhs = rhs self.depends = self.lhs.depend() | self.rhs.depend() def __repr__(self): return "{} | {}".format(self.lhs, self.rhs) @property def first(self): return self.lhs.first | self.rhs.first def rewrite(self): return self.lhs.rewrite() | self.rhs.rewrite() class Deriv(Combinator): # An idea of unevaluated derivative def __init__(self, parser, char): assert isinstance(parser, (Deriv, Name)) self.parser = parser self.char = char self.value = None def __repr__(self): return "D{!r}({})".format(self.char, self.parser) # Deriv is only produced for a symbol, or another deriv. The idea is # such chains are produced by left recursive rules. @property def first(self): return self.parser.first def rewrite(self): if self.value is None: self.value = derivative(self.parser.rewrite(), self.char) return self.value empty = Empty() # Since they're immutable, we need only one of each of these. fail = Fail() # Presents taking a derivative from each combinator. def derivative(parser, char): if isinstance(parser, Char): if char == parser.char: return empty elif isinstance(parser, Cat): lhs = derivative(parser.lhs, char) + parser.rhs rhs = derivative(parser.rhs, char) if is_nullable(parser.lhs) else fail return lhs | rhs elif isinstance(parser, Choice): return derivative(parser.lhs, char) | derivative(parser.rhs, char) elif isinstance(parser, Name): if parser.first is None: fixpoint_flow(parser) if char in parser.first: return deriv_cache(parser, char) elif isinstance(parser, Deriv): if char in parser.first: return deriv_cache(parser, char) return fail # This is here because the author proposes these constructs could be cached. derives = {} def deriv_cache(parser, char): key = (parser, char) if key in derives: return derives[key] derives[key] = deriv = Deriv(parser, char) return deriv def is_nullable(parser): if isinstance(parser, Cat): # Nullability potentially results in a rewrite, if it's obvious the branch # isn't nullable, further evaluations would be unnecessary. if parser.lhs.nullable == False or parser.rhs.nullable == False: return False return is_nullable(parser.lhs) and is_nullable(parser.rhs) elif isinstance(parser, Choice): return is_nullable(parser.lhs) or is_nullable(parser.rhs) elif isinstance(parser, Name) and parser.nullable is None: fixpoint_flow(parser) return parser.nullable elif isinstance(parser, Deriv): return is_nullable(parser.rewrite()) else: return parser.nullable # For Names we calculate a fixed point if it's not been calculated yet: # when something is determined to be nullable, we can consider that figured # out. If it was determined to be nullable, nothing further needs to be done. # If something becomes true, we should check the names depending to it # again. # Alongside we're determining the first -set of the grammar. def fixpoint_flow(parser): group = prepare_fixpoint_flow(set(), parser) while len(group) > 0: name = group.pop() name.nullable = is_nullable(name.value) F = len(name.first) name.first.update(name.value.first) if F < len(name.first): group.update(n for n in name.flow) elif name.nullable: group.update(n for n in name.flow if not n.nullable) # The fixed point flow is prepared by a set containing every combinator # that's not been determined to be nullable. def prepare_fixpoint_flow(group, parser): parser.nullable = False parser.first = set() group.add(parser) for depend in parser.depends: if depend.nullable is None: prepare_fixpoint_flow(group, depend) for flow in parser.flow: if flow.nullable is None: prepare_fixpoint_flow(group, flow) return group if __name__=='__main__': main()