Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Python parsing with derivatives.
#!/usr/bin/env python
# This file implements Brzozowski's parsing-with-derivatives (see:
# with fairly simple and
# straightforward Python code. This implementation is naive and
# subject to exponential memory growth in the worst case; in practice,
# average complexity is closer to linear. This version recalculates
# derivatives every time they are needed; memoization, the first and
# most obvious optimization, is not implemented.
# For details and a pedagogical example, please see:
class Derives(object):
def __call__(self, w, l = None):
if l == None:
l = self
if (w == ""):
return nullable(l)
return self.__call__(w[1:], self.derive(w[0], l))
def derive(self, c, o = None):
if o == None:
o = self
if isinstance(o, Empty): return Empty()
if isinstance(o, Eps): return Empty()
if isinstance(o, Char):
if (o.c == c):
return Eps()
return Empty()
if isinstance(o, Rep):
return Cat(o.r.derive(c), o)
if isinstance(o, Alt):
return Alt(o.l.derive(c), o.r.derive(c))
if isinstance(o, Cat):
left_derivative = Cat(o.l.derive(c), o.r)
if nullable(o.l):
return Alt(left_derivative, o.r.derive(c))
return left_derivative
class Empty(Derives):
def __init__(self, *args, **kwargs): pass
def __str__(self): return "(Empty)"
class Eps(Derives):
def __init__(self, *args, **kwargs): pass
def __str__(self): return "(EPS)"
class Char(Derives):
def __init__(self, c, *args, **kwargs):
self.c = c
def __str__(self): return "(char '{}')".format(self.c)
class Cat(Derives):
def __init__(self, l, r, *args, **kwargs):
if len(args) > 0:
self.l = l
self.r = Cat(r, args[0], *args[1:])
self.l = l
self.r = r
def __str__(self):
return "(cat {} {})".format(self.l, self.r)
class Alt(Derives):
def __init__(self, l, r, *args, **kwargs):
if len(args) > 0:
self.l = l
self.r = Alt(r, args[0], *args[1:])
self.l = l
self.r = r
def __str__(self):
return "(alt {} {})".format(self.l, self.r)
class Rep(Derives):
def __init__(self, r, *args, **kwargs):
self.r = r
def __str__(self):
return "(rep {})".format(self.r)
def nullable(l):
if isinstance(l, Empty) or isinstance(l, Char): return False
if isinstance(l, Eps) or isinstance(l, Rep): return True
if isinstance(l, Alt): return nullable(l.l) or nullable(l.r)
if isinstance(l, Cat): return nullable(l.l) and nullable(l.r)
raise "Not a language."
digit = Alt(*[Char(i) for i in "0123456789"])
floater = Cat(Alt(Eps(), Alt(Char("+"), Char("-"))),
for i in [("-2.0", True), ("1", False), ("", False), ("+12.12", True), ("1.0", True)]:
print i[1], floater(i[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment