Skip to content

Instantly share code, notes, and snippets.

@mgd020
Last active April 30, 2017 09:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mgd020/3fc49d169196017895aa9f310760457d to your computer and use it in GitHub Desktop.
Save mgd020/3fc49d169196017895aa9f310760457d to your computer and use it in GitHub Desktop.
Python implementation of Earley recogniser
from __future__ import absolute_import, division, print_function, unicode_literals
from collections import namedtuple
from itertools import chain
class Rule(namedtuple('Rule', ['lhs', 'rhs'])):
def __new__(cls, lhs, rhs):
return super(Rule, cls).__new__(cls, lhs, tuple(rhs)) # ensure hashable
class Item(namedtuple('Item', ['rule', 'index', 'origin'])):
def __str__(self):
return '{} -> {} ({})'.format(
self.rule.lhs,
' '.join(self.rule.rhs[:self.index] + ('.',) + self.rule.rhs[self.index:]),
self.origin,
)
def symbol(self):
return self.rule.rhs[self.index]
def next(self):
return self._replace(index=self.index + 1)
def set_add(s, e):
"""Add element to set, returning True if it was not already in the set."""
l = len(s)
s.add(e)
return l != len(s)
def earley(grammar, tokens, verbose=False):
"""
Earley recogniser.
grammar: a list of Rules
tokens: a iterator of input tokens
"""
start_item = Item(rule=grammar[0], index=0, origin=0)
s = [{start_item}]
lhs_rules = {} # {Rule.lhs: [Rule]}
for rule in grammar:
lhs_rules.setdefault(rule.lhs, []).append(rule)
for k, token in enumerate(chain(tokens, (None,))): # each token and then END
sk = s[k]
items = list(sk)
sk1 = set()
s.append(sk1)
for item in items:
try:
symbol = item.symbol()
except IndexError: # completion: add origins to s[k]
oitems = items if item.origin == k else s[item.origin]
for oitem in oitems:
try:
if oitem.symbol() == item.rule.lhs:
nitem = oitem.next()
set_add(sk, nitem) and items.append(nitem)
except IndexError:
pass # origin rule is complete, so it can't possibly match
else:
try:
rules = lhs_rules[symbol]
except KeyError: # scanning: check for matching token
if symbol == token:
sk1.add(item.next())
else: # prediction: expand possible rules
for rule in rules:
nitem = Item(rule, 0, k)
set_add(sk, nitem) and items.append(nitem)
if verbose:
for item in items:
print(item)
if token:
print('s[%d]:' % k, token)
if token and not sk1:
return False
end_item = start_item._replace(index=len(start_item.rule.rhs))
return end_item in s[k]
g = [
Rule('S', 'A'),
Rule('A', ''),
Rule('A', 'B'),
Rule('B', 'A'),
]
assert earley(g, 'a', True) is False
assert earley(g, '', True) is True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment