Skip to content

Instantly share code, notes, and snippets.

@cheery
Created May 4, 2015 21:00
Show Gist options
  • Save cheery/ae4da756c17f8306c240 to your computer and use it in GitHub Desktop.
Save cheery/ae4da756c17f8306c240 to your computer and use it in GitHub Desktop.
GLL recognizer/parser
# A GLL recognizer with a twist.
# The parser builds up a list of reduction rules when it is parsing.
# Once sorted, the list can be used to build parse trees.
class GSS(object):
def __init__(self, label, index=0):
self.label = label
self.index = index
self.edges = set()
self.nodes = {}
def reduction(self, chart, output):
if chart not in self.nodes:
self.nodes[chart] = node = Node(self.label, self.index, chart.index)
output.append(node)
return node
return self.nodes[chart]
class Node(object):
def __init__(self, label, start, stop):
self.label = label
self.start = start
self.stop = stop
self.values = set()
class Chart(object):
def __init__(self, output, index):
self.index = index
self.cont = None
self.visit = set()
self.gss = {}
self.output = output
def add(self, lcc):
if lcc not in self.visit:
self.visit.add(lcc)
self.cont = (lcc, self.cont)
def create(self, goto, label, cc, cn=None):
if label in self.gss:
cell = self.gss[label]
else:
cell = self.gss[label] = GSS(label, self.index)
self.add((goto, cell))
if cc not in cell.edges:
cell.edges.add(cc)
for chart in cell.nodes:
chart.add((cell.label, cc))
return cell
def pop(self, cell, value):
node = cell.reduction(self, self.output)
for cc in cell.edges:
self.add((cell.label, cc))
node.values.add(value)
# I intend to use this in interactive setting, for that reason
# the parsing proceeds consuming one token per run.
class ShiftParser(object):
def __init__(self, output):
self.output = output
self.current = Chart(output, 0)
self.shift = Chart(output, 1)
def step(self, token):
while self.current.cont:
(label, cc), self.current.cont = self.current.cont
label(self, cc, token)
self.current = self.shift
self.shift = Chart(self.output, self.current.index+1)
# In the next iteration, I will likely want to replace these all
# with parser combinators. This thing has been copied from an example.
def LS(table, cc, token):
if token in ("a", "c"):
table.current.create(LA, L1, cc)
if token in ("a", "b"):
table.current.create(LB, L3, cc)
if token in ("d", None):
table.current.pop(cc, ('S3',))
def L1(table, cc, token):
table.current.create(LS, L2, cc)
def L2(table, cc, token):
if token == 'd':
table.shift.pop(cc, ('S1', L1, L2, token))
def L3(table, cc, token):
table.current.create(LS, L4, cc)
def L4(table, cc, token):
table.current.pop(cc, ('S2', L3, L4))
def LA(table, cc, token):
if token in ("a", "c"):
table.shift.pop(cc, ('A', token))
def LB(table, cc, token):
if token in ("a", "b"):
table.shift.pop(cc, ('B', token))
table = ShiftParser([])
table.current.add((LS, GSS(None)))
for token in "aabd":
table.step(token)
if table.current.cont is None:
raise Exception("syn")
table.step(None)
assert table.current.cont is None
assert table.shift.cont is None
def stringify(cell):
if isinstance(cell, str):
return repr(cell)
return cell.__name__ if cell else 'match'
table.output.sort(key=lambda x: (x.start, -x.stop))
for node in table.output:
print node.start, node.stop, stringify(node.label)
for value in node.values:
print ' ', ' '.join(map(stringify, value))
# I had fancy graph building code interspersed into
# the recognizer, until I figured out that I had already
# affected the way recognizer functions.
#from pygraphviz import AGraph
#graph = AGraph(directed=True)
#graph.draw('hello.png', prog='dot')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment