Created
May 4, 2015 21:00
-
-
Save cheery/ae4da756c17f8306c240 to your computer and use it in GitHub Desktop.
GLL recognizer/parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A GLL recognizer with a twist. | |
# The parser builds up a list of reduction rules when it is parsing. | |
# Once sorted, the list can be used to build parse trees. | |
class GSS(object): | |
def __init__(self, label, index=0): | |
self.label = label | |
self.index = index | |
self.edges = set() | |
self.nodes = {} | |
def reduction(self, chart, output): | |
if chart not in self.nodes: | |
self.nodes[chart] = node = Node(self.label, self.index, chart.index) | |
output.append(node) | |
return node | |
return self.nodes[chart] | |
class Node(object): | |
def __init__(self, label, start, stop): | |
self.label = label | |
self.start = start | |
self.stop = stop | |
self.values = set() | |
class Chart(object): | |
def __init__(self, output, index): | |
self.index = index | |
self.cont = None | |
self.visit = set() | |
self.gss = {} | |
self.output = output | |
def add(self, lcc): | |
if lcc not in self.visit: | |
self.visit.add(lcc) | |
self.cont = (lcc, self.cont) | |
def create(self, goto, label, cc, cn=None): | |
if label in self.gss: | |
cell = self.gss[label] | |
else: | |
cell = self.gss[label] = GSS(label, self.index) | |
self.add((goto, cell)) | |
if cc not in cell.edges: | |
cell.edges.add(cc) | |
for chart in cell.nodes: | |
chart.add((cell.label, cc)) | |
return cell | |
def pop(self, cell, value): | |
node = cell.reduction(self, self.output) | |
for cc in cell.edges: | |
self.add((cell.label, cc)) | |
node.values.add(value) | |
# I intend to use this in interactive setting, for that reason | |
# the parsing proceeds consuming one token per run. | |
class ShiftParser(object): | |
def __init__(self, output): | |
self.output = output | |
self.current = Chart(output, 0) | |
self.shift = Chart(output, 1) | |
def step(self, token): | |
while self.current.cont: | |
(label, cc), self.current.cont = self.current.cont | |
label(self, cc, token) | |
self.current = self.shift | |
self.shift = Chart(self.output, self.current.index+1) | |
# In the next iteration, I will likely want to replace these all | |
# with parser combinators. This thing has been copied from an example. | |
def LS(table, cc, token): | |
if token in ("a", "c"): | |
table.current.create(LA, L1, cc) | |
if token in ("a", "b"): | |
table.current.create(LB, L3, cc) | |
if token in ("d", None): | |
table.current.pop(cc, ('S3',)) | |
def L1(table, cc, token): | |
table.current.create(LS, L2, cc) | |
def L2(table, cc, token): | |
if token == 'd': | |
table.shift.pop(cc, ('S1', L1, L2, token)) | |
def L3(table, cc, token): | |
table.current.create(LS, L4, cc) | |
def L4(table, cc, token): | |
table.current.pop(cc, ('S2', L3, L4)) | |
def LA(table, cc, token): | |
if token in ("a", "c"): | |
table.shift.pop(cc, ('A', token)) | |
def LB(table, cc, token): | |
if token in ("a", "b"): | |
table.shift.pop(cc, ('B', token)) | |
table = ShiftParser([]) | |
table.current.add((LS, GSS(None))) | |
for token in "aabd": | |
table.step(token) | |
if table.current.cont is None: | |
raise Exception("syn") | |
table.step(None) | |
assert table.current.cont is None | |
assert table.shift.cont is None | |
def stringify(cell): | |
if isinstance(cell, str): | |
return repr(cell) | |
return cell.__name__ if cell else 'match' | |
table.output.sort(key=lambda x: (x.start, -x.stop)) | |
for node in table.output: | |
print node.start, node.stop, stringify(node.label) | |
for value in node.values: | |
print ' ', ' '.join(map(stringify, value)) | |
# I had fancy graph building code interspersed into | |
# the recognizer, until I figured out that I had already | |
# affected the way recognizer functions. | |
#from pygraphviz import AGraph | |
#graph = AGraph(directed=True) | |
#graph.draw('hello.png', prog='dot') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment