Last active
March 19, 2018 03:02
-
-
Save elfsternberg/09d3dd2ed93f18c097c93d3a47b1939c to your computer and use it in GitHub Desktop.
Python parsing with derivatives.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This file implements Brzozowski's parsing-with-derivatives (see: | |
# https://arxiv.org/abs/1604.04695) with fairly simple and | |
# straightforward Python code. This implementation is naive and | |
# subject to exponential memory growth in the worst case; in practice, | |
# average complexity is closer to linear. This version recalculates | |
# derivatives every time they are needed; memoization, the first and | |
# most obvious optimization, is not implemented. | |
# | |
# For details and a pedagogical example, please see: | |
# http://www.elfsternberg.com/2018/01/24/parsing-derivatives-naive-python-edition/ | |
class Derives(object): | |
def __call__(self, w, l = None): | |
if l == None: | |
l = self | |
if (w == ""): | |
return nullable(l) | |
return self.__call__(w[1:], self.derive(w[0], l)) | |
def derive(self, c, o = None): | |
if o == None: | |
o = self | |
if isinstance(o, Empty): return Empty() | |
if isinstance(o, Eps): return Empty() | |
if isinstance(o, Char): | |
if (o.c == c): | |
return Eps() | |
return Empty() | |
if isinstance(o, Rep): | |
return Cat(o.r.derive(c), o) | |
if isinstance(o, Alt): | |
return Alt(o.l.derive(c), o.r.derive(c)) | |
if isinstance(o, Cat): | |
left_derivative = Cat(o.l.derive(c), o.r) | |
if nullable(o.l): | |
return Alt(left_derivative, o.r.derive(c)) | |
return left_derivative | |
class Empty(Derives): | |
def __init__(self, *args, **kwargs): pass | |
def __str__(self): return "(Empty)" | |
class Eps(Derives): | |
def __init__(self, *args, **kwargs): pass | |
def __str__(self): return "(EPS)" | |
class Char(Derives): | |
def __init__(self, c, *args, **kwargs): | |
self.c = c | |
def __str__(self): return "(char '{}')".format(self.c) | |
class Cat(Derives): | |
def __init__(self, l, r, *args, **kwargs): | |
if len(args) > 0: | |
self.l = l | |
self.r = Cat(r, args[0], *args[1:]) | |
else: | |
self.l = l | |
self.r = r | |
def __str__(self): | |
return "(cat {} {})".format(self.l, self.r) | |
class Alt(Derives): | |
def __init__(self, l, r, *args, **kwargs): | |
if len(args) > 0: | |
self.l = l | |
self.r = Alt(r, args[0], *args[1:]) | |
else: | |
self.l = l | |
self.r = r | |
def __str__(self): | |
return "(alt {} {})".format(self.l, self.r) | |
class Rep(Derives): | |
def __init__(self, r, *args, **kwargs): | |
self.r = r | |
def __str__(self): | |
return "(rep {})".format(self.r) | |
def nullable(l): | |
if isinstance(l, Empty) or isinstance(l, Char): return False | |
if isinstance(l, Eps) or isinstance(l, Rep): return True | |
if isinstance(l, Alt): return nullable(l.l) or nullable(l.r) | |
if isinstance(l, Cat): return nullable(l.l) and nullable(l.r) | |
raise "Not a language." | |
digit = Alt(*[Char(i) for i in "0123456789"]) | |
floater = Cat(Alt(Eps(), Alt(Char("+"), Char("-"))), | |
Rep(digit), | |
Char("."), | |
digit, | |
Rep(digit)) | |
for i in [("-2.0", True), ("1", False), ("", False), ("+12.12", True), ("1.0", True)]: | |
print i[1], floater(i[0]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment