Skip to content

Instantly share code, notes, and snippets.

@Trebor-Huang
Created November 6, 2022 15:31
Show Gist options
  • Save Trebor-Huang/9e733a28d30b39b4994b014ca225bdcd to your computer and use it in GitHub Desktop.
Save Trebor-Huang/9e733a28d30b39b4994b014ca225bdcd to your computer and use it in GitHub Desktop.
Parsing with derivatives
from functools import lru_cache
class Map:
def __init__(self, f, iterable):
self.data = (f, iterable)
def __iter__(self):
for i in self.data[1]:
yield self.data[0](i)
class Parser:
def __init__(self, equations:dict, charP=None, concatP=None):
self.charP = charP or (lambda d: ("Char", d))
self.concatP = concatP or (lambda s, t: ("Concat", s, t))
self.equations = {("", name) : self.compaction(equations[name]) for name in equations}
def __getitem__(self, name):
"""Lazily evaluate derivatives."""
if name in self.equations:
return self.equations[name]
cs, name = name
if cs == "":
raise ValueError("Node name not found:", name)
comb = self.compaction(self.derive(self[cs[:-1], name], cs[-1]))
self.equations[cs, name] = comb
return comb
@lru_cache(10000)
def compaction(self, comb):
if not isinstance(comb, tuple):
return comb
comb = tuple(self.compaction(c) for c in comb)
match comb:
case ("Nullify", ("Char", _)):
return "Reject",
case ("Nullify", ("Null", trees)):
return "Null", trees
case ("Nullify", ("Nullify", comb)):
return ("Nullify", comb)
case ("Concat", ("Reject",), _) | ("Concat", _, ("Reject",)):
return "Reject",
case ("Union", ("Reject",), comb) | ("Union", comb, ("Reject",)):
return comb
case ("Transform", ("Null", trees), f):
return "Null", Map(f, trees)
case ("Transform", ("Transform", comb, f), g):
return "Transform", comb, lambda x: g(f(x))
case _:
return comb
@lru_cache(10000)
def derive(self, comb, c):
match comb:
case "Reject",:
return "Reject",
case "Char", d:
if c == d:
return "Null", (self.charP(d),)
return "Reject",
case "Null", _:
return "Reject",
case "Union", s, t:
return "Union", self.derive(s, c), self.derive(t, c)
case "Concat", s, t:
return "Union",\
("Concat", self.derive(s, c), t),\
("Concat", ("Nullify", s), self.derive(t, c))
case "Nullify", s:
return "Reject",
case "Transform", s, f:
return "Transform", self.derive(s, c), f
case "Rec", cs, ref:
return "Rec", cs + c, ref
case _:
raise ValueError("Unrecognized grammar", comb)
def parse(self, string = '', entry = "Language"):
# This produces trees, will be faster if only recognizes
self.queue = [(string, entry)]
self.production = {}
self.notification = {}
while self.queue:
self.notify = set()
cs, ref = self.queue.pop(0)
# ("Queue: ", cs, ref)
notify = False
for tree in self.ennull(self[cs, ref]):
if tree in self.production[cs, ref]:
continue
# ("Produced: ", tree)
notify = True
self.production[cs, ref].add(tree)
if (cs, ref) == (string, entry):
yield tree
# ("Subscribing: ", self.notify)
for u in self.notify:
if u not in self.notification:
self.notification[u] = set()
self.notification[u].add((cs, ref))
if notify:
# ("Notifying: ", self.notification[cs, ref])
self.queue.extend(self.notification[cs, ref])
def ennull(self, comb = ("Rec", '', "Language")):
match comb:
case "Reject",:
return
case "Char", d:
return
case "Null", trees:
yield from trees
case "Union", s, t:
yield from self.ennull(s)
yield from self.ennull(t)
case "Concat", s, t:
for left in self.ennull(s):
for right in self.ennull(t):
yield self.concatP(left, right)
case "Nullify", s:
yield from self.ennull(s)
case "Transform", s, f:
for tree in self.ennull(s):
yield f(tree)
case "Rec", cs, ref: # Least fixpoint
self.notify.add((cs, ref))
if (cs, ref) not in self.production:
self.production[cs, ref] = set()
self.queue.append((cs, ref))
yield from self.production[cs, ref]
case _:
raise ValueError("Unrecognized grammar", comb)
lang = Parser({ # L := L{L}L | L where '{' and '}' are terminals
"Language":
("Union",
("Concat", ("Rec", '', "Language"),
("Concat", ("Char", '{'),
("Concat", ("Rec", '', "Language"),
("Concat", ("Char", '}'),
("Rec", '', "Language"))))),
("Null", (("Null",),)))
})
for tree in lang.parse('{{}{{}{}}}{}'):
print(tree)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment