cheery/gll.py

## gll.py
# A GLL recognizer with a twist.
# The parser builds up a list of reduction rules when it is parsing.
# Once sorted, the list can be used to build parse trees.

class GSS(object):
    def __init__(self, label, index=0):
        self.label = label
        self.index = index
        self.edges = set()
        self.nodes = {}

    def reduction(self, chart, output):
        if chart not in self.nodes:
            self.nodes[chart] = node = Node(self.label, self.index, chart.index)
            output.append(node)
            return node
        return self.nodes[chart]

class Node(object):
    def __init__(self, label, start, stop):
        self.label = label
        self.start = start
        self.stop = stop
        self.values = set()

class Chart(object):
    def __init__(self, output, index):
        self.index = index
        self.cont = None
        self.visit = set()
        self.gss = {}
        self.output = output

    def add(self, lcc):
        if lcc not in self.visit:
            self.visit.add(lcc)
            self.cont = (lcc, self.cont)

    def create(self, goto, label, cc, cn=None):
        if label in self.gss:
            cell = self.gss[label]
        else:
            cell = self.gss[label] = GSS(label, self.index)
            self.add((goto, cell))
        if cc not in cell.edges:
            cell.edges.add(cc)
            for chart in cell.nodes:
                chart.add((cell.label, cc))
        return cell

    def pop(self, cell, value):
        node = cell.reduction(self, self.output)
        for cc in cell.edges:
            self.add((cell.label, cc))
        node.values.add(value)

# I intend to use this in interactive setting, for that reason
# the parsing proceeds consuming one token per run.
class ShiftParser(object):
    def __init__(self, output):
        self.output = output
        self.current = Chart(output, 0)
        self.shift = Chart(output, 1)

    def step(self, token):
        while self.current.cont:
            (label, cc), self.current.cont = self.current.cont
            label(self, cc, token)
        self.current = self.shift
        self.shift = Chart(self.output, self.current.index+1)

# In the next iteration, I will likely want to replace these all
# with parser combinators. This thing has been copied from an example.
def LS(table, cc, token):
    if token in ("a", "c"):
        table.current.create(LA, L1, cc)
    if token in ("a", "b"):
        table.current.create(LB, L3, cc)
    if token in ("d", None):
        table.current.pop(cc, ('S3',))

def L1(table, cc, token):
    table.current.create(LS, L2, cc)

def L2(table, cc, token):
    if token == 'd':
        table.shift.pop(cc, ('S1', L1, L2, token))

def L3(table, cc, token):
    table.current.create(LS, L4, cc)

def L4(table, cc, token):
    table.current.pop(cc, ('S2', L3, L4))

def LA(table, cc, token):
    if token in ("a", "c"):
        table.shift.pop(cc, ('A', token))

def LB(table, cc, token):
    if token in ("a", "b"):
        table.shift.pop(cc, ('B', token))

table = ShiftParser([])
table.current.add((LS, GSS(None)))
for token in "aabd":
    table.step(token)
    if table.current.cont is None:
        raise Exception("syn")
table.step(None)
assert table.current.cont is None
assert table.shift.cont is None

def stringify(cell):
    if isinstance(cell, str):
        return repr(cell)
    return cell.__name__ if cell else 'match'

table.output.sort(key=lambda x: (x.start, -x.stop))
for node in table.output:
    print node.start, node.stop, stringify(node.label)
    for value in node.values:
        print '   ', ' '.join(map(stringify, value))

# I had fancy graph building code interspersed into
# the recognizer, until I figured out that I had already
# affected the way recognizer functions.
#from pygraphviz import AGraph
#graph = AGraph(directed=True)
#graph.draw('hello.png', prog='dot')
	# A GLL recognizer with a twist.
	# The parser builds up a list of reduction rules when it is parsing.
	# Once sorted, the list can be used to build parse trees.

	class GSS(object):
	def __init__(self, label, index=0):
	self.label = label
	self.index = index
	self.edges = set()
	self.nodes = {}

	def reduction(self, chart, output):
	if chart not in self.nodes:
	self.nodes[chart] = node = Node(self.label, self.index, chart.index)
	output.append(node)
	return node
	return self.nodes[chart]

	class Node(object):
	def __init__(self, label, start, stop):
	self.label = label
	self.start = start
	self.stop = stop
	self.values = set()

	class Chart(object):
	def __init__(self, output, index):
	self.index = index
	self.cont = None
	self.visit = set()
	self.gss = {}
	self.output = output

	def add(self, lcc):
	if lcc not in self.visit:
	self.visit.add(lcc)
	self.cont = (lcc, self.cont)

	def create(self, goto, label, cc, cn=None):
	if label in self.gss:
	cell = self.gss[label]
	else:
	cell = self.gss[label] = GSS(label, self.index)
	self.add((goto, cell))
	if cc not in cell.edges:
	cell.edges.add(cc)
	for chart in cell.nodes:
	chart.add((cell.label, cc))
	return cell

	def pop(self, cell, value):
	node = cell.reduction(self, self.output)
	for cc in cell.edges:
	self.add((cell.label, cc))
	node.values.add(value)

	# I intend to use this in interactive setting, for that reason
	# the parsing proceeds consuming one token per run.
	class ShiftParser(object):
	def __init__(self, output):
	self.output = output
	self.current = Chart(output, 0)
	self.shift = Chart(output, 1)

	def step(self, token):
	while self.current.cont:
	(label, cc), self.current.cont = self.current.cont
	label(self, cc, token)
	self.current = self.shift
	self.shift = Chart(self.output, self.current.index+1)

	# In the next iteration, I will likely want to replace these all
	# with parser combinators. This thing has been copied from an example.
	def LS(table, cc, token):
	if token in ("a", "c"):
	table.current.create(LA, L1, cc)
	if token in ("a", "b"):
	table.current.create(LB, L3, cc)
	if token in ("d", None):
	table.current.pop(cc, ('S3',))

	def L1(table, cc, token):
	table.current.create(LS, L2, cc)

	def L2(table, cc, token):
	if token == 'd':
	table.shift.pop(cc, ('S1', L1, L2, token))

	def L3(table, cc, token):
	table.current.create(LS, L4, cc)

	def L4(table, cc, token):
	table.current.pop(cc, ('S2', L3, L4))

	def LA(table, cc, token):
	if token in ("a", "c"):
	table.shift.pop(cc, ('A', token))

	def LB(table, cc, token):
	if token in ("a", "b"):
	table.shift.pop(cc, ('B', token))

	table = ShiftParser([])
	table.current.add((LS, GSS(None)))
	for token in "aabd":
	table.step(token)
	if table.current.cont is None:
	raise Exception("syn")
	table.step(None)
	assert table.current.cont is None
	assert table.shift.cont is None

	def stringify(cell):
	if isinstance(cell, str):
	return repr(cell)
	return cell.__name__ if cell else 'match'

	table.output.sort(key=lambda x: (x.start, -x.stop))
	for node in table.output:
	print node.start, node.stop, stringify(node.label)
	for value in node.values:
	print ' ', ' '.join(map(stringify, value))

	# I had fancy graph building code interspersed into
	# the recognizer, until I figured out that I had already
	# affected the way recognizer functions.
	#from pygraphviz import AGraph
	#graph = AGraph(directed=True)
	#graph.draw('hello.png', prog='dot')