Skip to content

Instantly share code, notes, and snippets.

@brendan-rius
Created December 28, 2015 11:56
Show Gist options
  • Save brendan-rius/f60d7326aee4e33c5aaf to your computer and use it in GitHub Desktop.
Save brendan-rius/f60d7326aee4e33c5aaf to your computer and use it in GitHub Desktop.
Markov model first ugly experiment
import random
import itertools
import nltk
from terminaltables import AsciiTable
def aff(matrix, voc):
m = matrix.copy()
m = [[word] + row for row, word in zip(m, voc)]
m.insert(0, voc)
m = [[str(x) for x in row] for row in m]
m[0].insert(0, '')
table = AsciiTable(m)
print(table.table)
def next(matrix, starting_word, vocabulary):
choices = matrix[vocabulary.index(starting_word)]
choices = list(itertools.chain.from_iterable([w * [s] for w, s in zip(choices, vocabulary)]))
return random.choice(choices)
text = "the computer of the son of the daughter of the sister of Mary"
words = nltk.word_tokenize(text)
vocabulary = list(set(words))
matrix = [[0 for i in range(len(vocabulary))] for y in range(len(vocabulary))]
for w1, w2 in zip(words, words[1:]):
idx1 = vocabulary.index(w1)
idx2 = vocabulary.index(w2)
matrix[idx1][idx2] += 1
print(aff(matrix, vocabulary)) # We print the occurrences matrix
print(next(matrix, "of", vocabulary)) # We predict the next word knowing the previous one was "of"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment