Skip to content

Instantly share code, notes, and snippets.

@mxbi
Created May 22, 2021 22:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mxbi/69c507274b05e51359573194ae4c4ef0 to your computer and use it in GitHub Desktop.
Save mxbi/69c507274b05e51359573194ae4c4ef0 to your computer and use it in GitHub Desktop.
######### SETTING UP GRAMMAR
rules_list = [("S", "NP", "VP"),
("NP", "N", "PP"),
("NP", "N"),
("PP", "P", "NP"),
("VP", "VP", "PP"),
("VP", "V", "VP"),
("VP", "V", "NP"),
("VP", "V"),
]
from collections import defaultdict
rules = defaultdict(list)
for rule in rules_list:
rules[rule[0]].append(rule[1:])
word_rules = defaultdict(list)
word_rules['N'] = [(x, ) for x in ['they', 'can', 'fish', 'rivers', 'december']]
word_rules['P'] = [(x, ) for x in ['in']]
word_rules['V'] = [(x, ) for x in ['can', 'fish']]
########### PARSER STARTS HERE
class Derivation:
def __init__(self, root, rule, ptr, start, end, hist):
self.root = root
self.rule = rule
self.ptr = ptr
self.start = start
self.end = end
self.hist = hist
def __repr__(self):
rule2 = list(self.rule)
rule2.insert(self.ptr, '⬛')
return f"{self.root.rjust(2)} -> {' '.join(rule2)}".ljust(15) + " [{}, {}] {}".format(self.start, self.end, self.hist)
def __members(self):
return (self.root, self.rule, self.ptr, self.start, self.end, self.hist)
def __eq__(self, other):
if type(other) is type(self):
return self.__members() == other.__members()
else:
return False
def __hash__(self):
return hash(self.__members())
def predict(chart):
new_derivs = []
for deriv in chart:
if deriv.ptr < len(deriv.rule): # incomplete derivation
first_token = deriv.rule[deriv.ptr]
for rule in rules[first_token]:
n = Derivation(first_token, rule, 0, deriv.end, deriv.end, [])
new_derivs.append(n)
return chart + new_derivs
def scan(chart, tokens):
new_derivs = []
for deriv in chart:
if deriv.ptr < len(deriv.rule):
first_token = deriv.rule[deriv.ptr]
if first_token in word_rules and deriv.end < len(tokens): # we can potentially parse a word here
if (tokens[deriv.end], ) in word_rules[first_token]:
n = Derivation(first_token, (tokens[deriv.end], ), 1, deriv.end, deriv.end+1, [])
new_derivs.append(n)
return chart + new_derivs
def complete(chart):
completions = []
for d1 in chart:
for i2, d2 in enumerate(chart):
# check if d2 completes d1
# also need to check that d2 itself is complete
if d1.ptr < len(d1.rule) and d2.root == d1.rule[d1.ptr] and d2.ptr == len(d2.rule) and d2.start == d1.end:
n = Derivation(d1.root, d1.rule, d1.ptr+1, d1.start, d2.end, d1.hist + [i2])
completions.append(n)
return chart + completions
def normalize(chart):
new_chart = []
for d in chart:
if d not in new_chart:
new_chart.append(d)
return new_chart
def tokenize(string):
return string.lower().split(" ")
def pprint(chart):
for i, d in enumerate(chart):
print(f"{i} {d}")
def execute(string):
tokens = tokenize(string)
# Start
chart = []
chart.append(Derivation('S', rules['S'][0], 0, 0, 0, []))
# TODO: fix hardcoded number of iterations
for i in range(10):
print('\n\n###### BEGIN CYCLE ######')
pprint(chart)
print('PREDICT')
chart = normalize(predict(chart))
pprint(chart)
print('SCAN')
chart = normalize(scan(chart, tokens))
pprint(chart)
print('COMPLETE')
while True:
new_chart = normalize(complete(chart))
if len(new_chart) == len(chart):
break
chart = new_chart
pprint(chart)
num_parses = 0
for d in chart:
if d.root == 'S' and d.end == len(tokens):
num_parses += 1
print(num_parses)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment