Skip to content

Instantly share code, notes, and snippets.

@hshindo
Created January 11, 2019 00:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hshindo/17117849722ebdcce0970cc94dc51972 to your computer and use it in GitHub Desktop.
Save hshindo/17117849722ebdcce0970cc94dc51972 to your computer and use it in GitHub Desktop.
class CollinsSpan:
def __init__(self, i, j, h, score):
self.i = i
self.j = j
self.h = h
self.score = score
def __str__(self):
return "[%s, %s, %s, %s]" % (self.i, self.j, self.h, self.score)
class CollinsParser:
def __init__(self):
self.chart = None
def parse(self, words):
self.init_spans(words)
# merge spans in a bottom-up manner
for l in xrange(1, len(words)+1):
for i in xrange(0, len(words)):
j = i + l
if j > len(words): break
for k in xrange(i+1, j):
for h_l in xrange(i, k):
for h_r in xrange(k, j):
span_l = self.chart[i][k][h_l]
span_r = self.chart[k][j][h_r]
# l -> r
score = self.get_score(words, span_l, span_r)
span = CollinsSpan(i, j, h_l, score)
self.add_span(span)
# r -> l
score = self.get_score(words, span_r, span_l)
span = CollinsSpan(i, j, h_r, score)
self.add_span(span)
# find the best span in the chart
return best
def init_spans(self, words):
# initialize chart as 3-dimensional list
length = len(words) + 1
chart = []
for i in xrange(length):
chart.append([])
for j in xrange(length):
chart[i].append([None] * length)
self.chart = chart
# add 1-length spans to the chart
for i in xrange(0, len(words)):
span = CollinsSpan(i, i+1, i, 0.0)
self.add_span(span)
def add_span(self, new_span):
i, j, h = new_span.i, new_span.j, new_span.h
old_span = self.chart[i][j][h]
if old_span is None or old_span.score < new_span.score:
self.chart[i][j][h] = new_span # update chart
def get_score(self, words, head, dep):
# currently, use naive scoring function
h_word = words[head.h]
if h_word == "read":
score = 1.0
elif h_word == "novel":
score = 1.0
else:
score = 0.0
# calculate score based on arc-factored model
return head.score + dep.score + score
""" Find the highest-scored span [i, j, h] from [i, j] """
def find_best(self, i, j):
# implement by your own
# run
p = CollinsParser()
best_span = p.parse(["She", "read", "a", "short", "novel"])
print best_span
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment