Skip to content

Instantly share code, notes, and snippets.

@cheery
Created November 13, 2015 01:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save cheery/bfb249b1f9a327d150f8 to your computer and use it in GitHub Desktop.
Save cheery/bfb249b1f9a327d150f8 to your computer and use it in GitHub Desktop.
Leo algorithm?
from structures import *
# This is described in the marpa thesis.
# could someone check out that it's correct?
# Joop Leo presented a method for dealing with right
# recursion in O(n) time. With his modification, earley
# algorithm is O(n) for all LR-regular grammars.
# spot potential right recursions and memoize them.
# Leo restricts the memoization to where right recursion
# is unambiguous.
# Potential right recursions are memoized by earley set,
# using "transitive items".
# traditional leo item is the triple:
# (top_dot, transition_symbol, top_origin)
# Leo items need to be later expanded for evaluation.
def main():
x = Nonterminal('x')
y = Nonterminal('y')
a = Term('a')
accept = x
grammar = [
Rule(x, [a, x]),
Rule(x, [a]),
]
input_string = "aaaaaaaaaaaa"
table = []
# Initial dotted rule
previous = set()
current = set()
for rule in grammar:
if rule.lhs == accept:
new_item = (Dot(rule, 0), 0)
current.add(new_item)
print ' INIT', new_item[0], new_item[1]
queue = list(current)
for dot, origin in queue:
# prediction
prediction(current, queue, grammar, dot.postdot(), 0)
table.append(current)
previous = current
current = set()
for location, token in enumerate(input_string, 1):
# completions proceed in non-deterministic manner, until
# everything has been completed.
queue = list(current)
print 'SET', location
# scanning
for item in previous:
dot, origin = item[:2]
postdot = dot.postdot()
if match(postdot, token):
new_item = (dot.next(), origin)
if new_item not in current:
print ' SCAN', new_item[0], new_item[1]
current.add(new_item)
queue.append(new_item)
for item in queue:
dot, origin = item[:2]
print ' ', dot, origin
# reduction
if dot.is_completed():
# leo reduction
for prior_item in table[origin]:
if len(prior_item) == 3 and prior_item[2] == dot.rule.lhs:
new_item = prior_item[0], prior_item[1], dot.rule.lhs
if new_item not in current:
print ' LEO REDU', new_item[0], new_item[1], dot.rule.lhs
current.add(new_item)
queue.append(new_item)
break
else:
for prior_item in table[origin]:
if len(prior_item) == 2:
before, before_origin = prior_item
if before.postdot() == dot.rule.lhs:
new_item = (before.next(), before_origin, dot.rule.lhs)
if new_item not in current:
print ' REDU', new_item[0], new_item[1], dot.rule.lhs
current.add(new_item)
queue.append(new_item)
# prediction
prediction(current, queue, grammar, dot.postdot(), location)
if dot.rule.lhs == accept and origin == 0:
print ' ACCEPT'
table.append(current)
previous = current
current = set()
def prediction(current, queue, grammar, symbol, location):
if not isinstance(symbol, Nonterminal):
return
for rule in grammar:
if rule.lhs == symbol:
new_item = (Dot(rule, 0), location)
if new_item not in current:
print ' PRED', new_item[0], new_item[1]
current.add(new_item)
queue.append(new_item)
# Mechanism to match for the current input.
def match(symbol, token):
return isinstance(symbol, Term) and symbol.name == token
if __name__=='__main__':
main()
class Rule(object):
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
def __len__(self):
return len(self.rhs)
def __repr__(self):
return "{} -> {}".format(
self.lhs,
' '.join(map(str, self.rhs)))
# Earlier I did not separate terminals from
# non-terminals because it was not strictly
# necessary. That turned out to confuse
# when designing grammars.
class Term(object):
def __init__(self, name):
self.name = name
def __repr__(self):
return "{!r}".format(self.name)
class Nonterminal(object):
def __init__(self, name):
self.name = name
def __repr__(self):
return "{!s}".format(self.name)
# The dotted rule
class Dot(object):
def __init__(self, rule, pos):
self.rule = rule
self.pos = pos
assert 0 <= pos <= len(rule)
def postdot(self):
if self.pos < len(self.rule):
return self.rule.rhs[self.pos]
return None
def next(self):
if self.postdot() is not None:
return Dot(self.rule, self.pos + 1)
return None
def penult(self):
if self.pos + 1 == len(self.rule):
return self.postdot()
def is_predicted(self):
return self.pos == 0
def is_confirmed(self):
return self.pos > 0
def is_completed(self):
return self.pos == len(self.rule)
def __hash__(self):
return hash((self.rule, self.pos))
def __eq__(self, other):
return isinstance(other, Dot) and self.rule == other.rule and self.pos == other.pos
def __repr__(self):
if isinstance(self.rule, Rule):
lhs = repr(self.rule.lhs)
pre = ' '.join(map(repr, self.rule.rhs[:self.pos]))
pos = ' '.join(map(repr, self.rule.rhs[self.pos:]))
return "[{} -> {} * {}]".format(lhs, pre, pos)
return object.__repr__(self)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment