multivac61/my_viterby.py

## my_viterby.py
    def _viterbi(self, observations):
        '''Generate the most likely sequence of words given the observations'''
        transitions = self.transition_prob; emissions = self.emission_prob

        V = {};    V[(0, X0)] = log10(1)    # initial probabilities
        path = {}; path[X0] = [X0]          # path to most likely seq

        current_words = [[] for i in range(len(observations)+1)]
        current_words[0] = [X0] # list of words that have an entry in bigram table

        for ind, obs in enumerate(observations):
            newpath = {}
            high_prob      = -INFINITY

            filtered_words = [j for (i,j) in transitions.keys()
                              if i in current_words[ind]]

            for w in filtered_words:
                for q in current_words[ind]:
                    trans_prob = transitions.get( (q, w), -INFINITY )
                    edit_prob  = emissions( obs, vocab_dict.get(w) )
                    prob       = trans_prob + edit_prob + V.get( (ind, q), -INFINITY )

                    if(prob > high_prob):
                        V[(ind+1, w)] = prob
                        newpath[w]    = path.get(q) + [w]
                        high_prob     = prob
                        current_words[ind+1].append(w)

            path = newpath

        # No we get the most likely path taken
        filtered_keys = [(i, j) for (i,j) in V.keys() if i==ind+1]
        filtered_dict = {key: V[key] for key in filtered_keys}
        index, item   = max(filtered_dict.iteritems(), key=operator.itemgetter(1))[0]
        return path[item]
	def _viterbi(self, observations):
	'''Generate the most likely sequence of words given the observations'''
	transitions = self.transition_prob; emissions = self.emission_prob

	V = {}; V[(0, X0)] = log10(1) # initial probabilities
	path = {}; path[X0] = [X0] # path to most likely seq

	current_words = [[] for i in range(len(observations)+1)]
	current_words[0] = [X0] # list of words that have an entry in bigram table

	for ind, obs in enumerate(observations):
	newpath = {}
	high_prob = -INFINITY

	filtered_words = [j for (i,j) in transitions.keys()
	if i in current_words[ind]]

	for w in filtered_words:
	for q in current_words[ind]:
	trans_prob = transitions.get( (q, w), -INFINITY )
	edit_prob = emissions( obs, vocab_dict.get(w) )
	prob = trans_prob + edit_prob + V.get( (ind, q), -INFINITY )

	if(prob > high_prob):
	V[(ind+1, w)] = prob
	newpath[w] = path.get(q) + [w]
	high_prob = prob
	current_words[ind+1].append(w)

	path = newpath

	# No we get the most likely path taken
	filtered_keys = [(i, j) for (i,j) in V.keys() if i==ind+1]
	filtered_dict = {key: V[key] for key in filtered_keys}
	index, item = max(filtered_dict.iteritems(), key=operator.itemgetter(1))[0]
	return path[item]