Skip to content

Instantly share code, notes, and snippets.

@multivac61
Last active October 19, 2015 16:11
Show Gist options
  • Save multivac61/38d709e3be2286bffef2 to your computer and use it in GitHub Desktop.
Save multivac61/38d709e3be2286bffef2 to your computer and use it in GitHub Desktop.
def _viterbi(self, observations):
'''Generate the most likely sequence of words given the observations'''
transitions = self.transition_prob; emissions = self.emission_prob
V = {}; V[(0, X0)] = log10(1) # initial probabilities
path = {}; path[X0] = [X0] # path to most likely seq
current_words = [[] for i in range(len(observations)+1)]
current_words[0] = [X0] # list of words that have an entry in bigram table
for ind, obs in enumerate(observations):
newpath = {}
high_prob = -INFINITY
filtered_words = [j for (i,j) in transitions.keys()
if i in current_words[ind]]
for w in filtered_words:
for q in current_words[ind]:
trans_prob = transitions.get( (q, w), -INFINITY )
edit_prob = emissions( obs, vocab_dict.get(w) )
prob = trans_prob + edit_prob + V.get( (ind, q), -INFINITY )
if(prob > high_prob):
V[(ind+1, w)] = prob
newpath[w] = path.get(q) + [w]
high_prob = prob
current_words[ind+1].append(w)
path = newpath
# No we get the most likely path taken
filtered_keys = [(i, j) for (i,j) in V.keys() if i==ind+1]
filtered_dict = {key: V[key] for key in filtered_keys}
index, item = max(filtered_dict.iteritems(), key=operator.itemgetter(1))[0]
return path[item]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment