Skip to content

Instantly share code, notes, and snippets.

@veer66
Created September 20, 2014 13:20
Show Gist options
  • Save veer66/44d1adec9545a647bb0a to your computer and use it in GitHub Desktop.
Save veer66/44d1adec9545a647bb0a to your computer and use it in GitHub Desktop.
Simple Earley Parser
import json
import sys
import re
import copy
import pprint
pp = pprint.PrettyPrinter(indent=4)
class EarleyParser(object):
def __init__(self):
pass
def run_cmd(self):
if len(sys.argv) != 2:
print("Usage: python", sys.argv[0], "<grammar in json format>",
file=sys.stderr)
exit(1)
self.grammar = json.load(open(sys.argv[1]))
for line in sys.stdin:
chart = self.parse(re.split('[\s+]', re.split('[\r\n]', line)[0]))
pp.pprint(chart)
def add_state(self, rule, c, i, p=0):
state = copy.copy(rule)
state["i"] = i
state["p"] = p
state_repr = json.dumps(state)
if state_repr not in self.idx[c]:
self.chart[c].append(state)
self.idx[c][state_repr] = True
def parse(self, words):
self.words = words
self.idx = [{} for i in range(len(self.words) +1)]
self.chart = [[] for i in range(len(self.words) + 1)]
self.add_state(self.grammar["start"], c=0, i=0)
for i in range(len(self.words) + 1):
for state in self.chart[i]:
if self.is_incomplete(state):
if self.is_next_nonterminal(state):
self.predict(state, i)
else:
self.scan(state, i)
else:
self.complete(state, i)
return self.chart
def is_incomplete(self, state):
return state["p"] < len(state["r"])
def is_next_nonterminal(self, state):
return re.match('[A-Z]', state["r"][state["p"]]) is not None
def predict(self, state, j):
b = state["r"][state["p"]]
for rule in filter(lambda rule: rule["l"] == b, self.grammar["rules"]):
self.add_state(rule, c=j, i=j)
def scan(self, state, j):
b = state["r"][state["p"]]
w = self.words[j]
if b in self.grammar["dix"][w]:
self.add_state({"l": b, "r": [w]}, c=j+1, i=j, p=1)
def complete(self, state, k):
b = state["l"]
j = state["i"]
for _state in self.chart[j]:
if self.is_incomplete(_state) and _state["r"][_state["p"]] == b:
self.add_state(_state, c=k, p=_state["p"]+1, i=_state["i"])
parser = EarleyParser()
parser.run_cmd()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment