Skip to content

Instantly share code, notes, and snippets.

@orlp
Last active July 26, 2018 00:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save orlp/ba8594a613ad3d98441b3ff29e916342 to your computer and use it in GitHub Desktop.
Save orlp/ba8594a613ad3d98441b3ff29e916342 to your computer and use it in GitHub Desktop.
from collections import namedtuple, defaultdict
EarleyItem = namedtuple("EarleyItem", ["rule", "dot", "origin"])
def item_advance(item):
return EarleyItem(rule=item.rule, dot=item.dot + 1, origin=item.origin)
class Generator:
def __init__(self, G, terminals):
self.G = [["S'", G[0][0]]] + list(G)
self.terminals = set(terminals)
self.nullable = set()
self.min_len = defaultdict(lambda: 100000)
changed = True
while changed:
changed = False
for i, rule in enumerate(self.G):
if rule[0] in self.nullable: continue
if all(self.is_nonterminal(s) and s in self.nullable for s in rule[1:]):
changed = True
self.nullable.add(rule[0])
for t in self.terminals:
self.min_len[t] = 1
changed = True
while changed:
changed = False
for i, rule in enumerate(self.G):
s = sum(self.min_len[s] for s in rule[1:])
if s < self.min_len[rule[0]]:
changed = True
self.min_len[rule[0]] = s
self.min_rule_len = [sum(self.min_len[s] for s in rule[1:]) for rule in self.G]
self.S = [{EarleyItem(rule=0, origin=0, dot=1)}]
def gen_all(self, l):
items = list(self.S[-1])
possib_scans = defaultdict(set)
while items:
new_items = []
for item in items:
sym = self.next_symbol(item)
if sym is None:
self.complete(item, new_items)
elif self.is_nonterminal(sym):
self.predict(item, new_items, l)
else:
possib_scans[sym].add(item_advance(item))
items = new_items
if l == 0:
if EarleyItem(rule=0, origin=0, dot=2) in self.S[-1]:
yield ""
return
for scan in sorted(possib_scans):
self.S.append(possib_scans[scan])
for suf in self.gen_all(l - 1):
yield scan + suf
self.S.pop()
def item_add(self, item, new_items):
if item not in self.S[-1]:
new_items.append(item)
self.S[-1].add(item)
def dump(self):
for i, state in enumerate(self.S):
print("###", i, "###")
for item in self.S[i]:
rule = self.G[item.rule]
print("{} -> {} ({})".format(rule[0], " ".join(rule[1:item.dot] + ["."] + rule[item.dot:]), item.origin))
# Given (B → γ•, x) introduce (A → αB•β, j) for all (A → α•Bβ, j) in S[x].
def complete(self, complete_item, new_items):
for item in self.S[complete_item.origin]:
sym = self.next_symbol(item)
if sym == self.G[complete_item.rule][0]:
self.item_add(item_advance(item), new_items)
# Given (A → α•Bβ, j) introduce (B → •γ, k) for all (B → γ) in the grammar.
# If B is nullable, also introduce (A → αB•β, j).
def predict(self, predict_item, new_items, l):
sym = self.next_symbol(predict_item)
if sym in self.nullable:
self.item_add(item_advance(predict_item), new_items)
for rulenr, rule in enumerate(self.G):
if rule[0] != sym: continue # FIXME: lookup.
if self.min_rule_len[rulenr] > l: continue
self.item_add(EarleyItem(rule=rulenr, origin=len(self.S)-1, dot=1), new_items)
def next_symbol(self, item):
rule = self.G[item.rule]
return rule[item.dot] if item.dot < len(rule) else None
def is_terminal(self, sym):
return sym in self.terminals
def is_nonterminal(self, sym):
return sym not in self.terminals
terminals = set("( ) - + * 0 1 2 3 4 5 6 7 8 9 a b < >".split())
G = [s.strip().split() for s in """
B S < < S
B S > > S
B S
S M + M
S M
M P * P
M P
P T * * T
P T
T N
T ( S )
T a
T b
N Z
N D Z
Z D
Z 0
D 1
D 2
D 3
D 4
D 5
D 6
D 7
D 8
D 9
""".split("\n") if s.strip()]
gen = Generator(G, terminals)
for s in gen.gen_all(4):
print(s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment