Create a gist now

Instantly share code, notes, and snippets.

@cheery /parser.py
Last active Mar 20, 2016

What would you like to do?
Tried to write a derivatives based parser.
# Implementing "parsing with derivatives"
# http://matt.might.net/articles/parsing-with-derivatives/
# Matt claims the algorithm terminates with laziness and memoization
# Though nullable -function must be computed as a least-fixed-point
# The grammars are modelled as a graph of parser combinators.
def main():
B = Name('B')
B.bind((B+Char('x')) | empty)
fixpoint_flow(B)
result = derivative(B.value, 'x')
print result, is_nullable(result)
result = derivative(result, 'x')
print result, is_nullable(result)
result = derivative(result, 'x')
print result, is_nullable(result)
result = derivative(result, 'x')
print result, is_nullable(result)
result = derivative(result, 'x')
print result, is_nullable(result)
# Output:
# D'x'(B)x | $ True
# D'x'(D'x'(B))x | $ True
# D'x'(D'x'(D'x'(B)))x | $ True
# D'x'(D'x'(D'x'(D'x'(B))))x | $ True
# D'x'(D'x'(D'x'(D'x'(D'x'(B)))))x | $ True
result = derivative(result, 'y')
print result, is_nullable(result)
result = derivative(result, 'x')
print result, is_nullable(result)
result = derivative(result, 'x')
print result, is_nullable(result)
# Output:
# 0 False
# 0 False
# 0 False
# Here's yet another try.
result = derivative(B.value, 'y')
print result, is_nullable(result)
result = derivative(result, 'y')
print result, is_nullable(result)
# Output:
# 0 False
# 0 False
# An another, although simple grammar.
E, T = Name('E'), Name('T')
E.bind(T | (E + Char('+') + T))
T.bind(Char('x') | (Char('x') + T))
result = derivative(E.value, 'x')
print result, is_nullable(result)
result = derivative(result, '+')
print result, is_nullable(result)
result = derivative(result, 'x')
print result, is_nullable(result)
result = derivative(result, 'x')
print result, is_nullable(result)
# Output:
# D'x'(T) | D'x'(E)+T True
# T False
# D'x'(T) True
# D'x'(D'x'(T)) True
class Combinator(object):
nullable = None # means for undetermined nullability
depends = frozenset() # set of name combinators the combinator's
# nullability depends on. A common case is
# the result doesn't depend on other combinators.
first = frozenset()
def __or__(self, other):
if self is fail: # avoid producing unneeded combinators when combining them.
return other
if other is fail:
return self
return Choice(self, other)
def __add__(self, other):
if self is fail or other is fail:
return fail
if self is empty:
return other
if other is empty:
return self
return Cat(self, other)
def depend(self): # Used to build the 'depends' -set when combining combinators.
return self.depends
def rewrite(self): # Called when the combinator needs to be 'evaluated'
return self # default is that the combinator doesn't reduce further.
class Empty(Combinator): # Accepts an empty string.
nullable = True
def __init__(self):
self.first = frozenset([self])
def __repr__(self):
return "$"
class Fail(Combinator): # Accepts nothing, attempted to be pruned out when combining
nullable = False
def __repr__(self):
return "0"
def __or__(self, other):
return other
class Char(Combinator): # Accepts a character
nullable = False
def __init__(self, char):
self.char = char
self.first = frozenset(char)
def __repr__(self):
return self.char
class Name(Combinator): # The 'nonterminal' building block to
def __init__(self, name): # construct recursive grammars
self.name = name
self.value = fail
self.depends = None
self.flow = set() # When nullability changes, the nullability of names in
# this set should change too.
self.first = None # The characters that this combinator can start with.
def __repr__(self):
return self.name
def bind(self, value): # bind assigns a value to the name.
assert self.nullable is None # to verify nullability is correct, we prefer
self.value = value # to think of Name as an immutable construct.
self.nullable = None
self.depends = self.value.depend()
for name in self.depends: # used to calculate nullability
name.flow.add(self)
def depend(self):
return {self}
def rewrite(self): # Name should evaluate to its value.
return self.value
class Cat(Combinator):
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
self.depends = self.lhs.depend() | self.rhs.depend()
def __repr__(self): # my notation needs parentheses if Choice appears inside Cat.
lhs = "({})".format(repr(self.lhs)) if isinstance(self.lhs, Choice) else repr(self.lhs)
rhs = "({})".format(repr(self.rhs)) if isinstance(self.rhs, Choice) else repr(self.rhs)
return lhs + rhs
@property
def first(self):
lhs = self.lhs.first
if empty in lhs:
return lhs | self.rhs.first
return lhs
# the example doesn't use this function, so bit unsure about the correctness here.
def rewrite(self): # bias to left rewrite in Catenation.
return self.lhs.rewrite() + self.rhs
class Choice(Combinator):
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
self.depends = self.lhs.depend() | self.rhs.depend()
def __repr__(self):
return "{} | {}".format(self.lhs, self.rhs)
@property
def first(self):
return self.lhs.first | self.rhs.first
def rewrite(self):
return self.lhs.rewrite() | self.rhs.rewrite()
class Deriv(Combinator): # An idea of unevaluated derivative
def __init__(self, parser, char):
assert isinstance(parser, (Deriv, Name))
self.parser = parser
self.char = char
self.value = None
def __repr__(self):
return "D{!r}({})".format(self.char, self.parser)
# Deriv is only produced for a symbol, or another deriv. The idea is
# such chains are produced by left recursive rules.
@property
def first(self):
return self.parser.first
def rewrite(self):
if self.value is None:
self.value = derivative(self.parser.rewrite(), self.char)
return self.value
empty = Empty() # Since they're immutable, we need only one of each of these.
fail = Fail()
# Presents taking a derivative from each combinator.
def derivative(parser, char):
if isinstance(parser, Char):
if char == parser.char:
return empty
elif isinstance(parser, Cat):
lhs = derivative(parser.lhs, char) + parser.rhs
rhs = derivative(parser.rhs, char) if is_nullable(parser.lhs) else fail
return lhs | rhs
elif isinstance(parser, Choice):
return derivative(parser.lhs, char) | derivative(parser.rhs, char)
elif isinstance(parser, Name):
if parser.first is None:
fixpoint_flow(parser)
if char in parser.first:
return deriv_cache(parser, char)
elif isinstance(parser, Deriv):
if char in parser.first:
return deriv_cache(parser, char)
return fail
# This is here because the author proposes these constructs could be cached.
derives = {}
def deriv_cache(parser, char):
key = (parser, char)
if key in derives:
return derives[key]
derives[key] = deriv = Deriv(parser, char)
return deriv
def is_nullable(parser):
if isinstance(parser, Cat):
# Nullability potentially results in a rewrite, if it's obvious the branch
# isn't nullable, further evaluations would be unnecessary.
if parser.lhs.nullable == False or parser.rhs.nullable == False:
return False
return is_nullable(parser.lhs) and is_nullable(parser.rhs)
elif isinstance(parser, Choice):
return is_nullable(parser.lhs) or is_nullable(parser.rhs)
elif isinstance(parser, Name) and parser.nullable is None:
fixpoint_flow(parser)
return parser.nullable
elif isinstance(parser, Deriv):
return is_nullable(parser.rewrite())
else:
return parser.nullable
# For Names we calculate a fixed point if it's not been calculated yet:
# when something is determined to be nullable, we can consider that figured
# out. If it was determined to be nullable, nothing further needs to be done.
# If something becomes true, we should check the names depending to it
# again.
# Alongside we're determining the first -set of the grammar.
def fixpoint_flow(parser):
group = prepare_fixpoint_flow(set(), parser)
while len(group) > 0:
name = group.pop()
name.nullable = is_nullable(name.value)
F = len(name.first)
name.first.update(name.value.first)
if F < len(name.first):
group.update(n for n in name.flow)
elif name.nullable:
group.update(n for n in name.flow if not n.nullable)
# The fixed point flow is prepared by a set containing every combinator
# that's not been determined to be nullable.
def prepare_fixpoint_flow(group, parser):
parser.nullable = False
parser.first = set()
group.add(parser)
for depend in parser.depends:
if depend.nullable is None:
prepare_fixpoint_flow(group, depend)
for flow in parser.flow:
if flow.nullable is None:
prepare_fixpoint_flow(group, flow)
return group
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment