Skip to content

Instantly share code, notes, and snippets.

@jklydev
Created June 17, 2016 02:36
Show Gist options
  • Save jklydev/76e2766d7506bad5ce5d83008d07eb43 to your computer and use it in GitHub Desktop.
Save jklydev/76e2766d7506bad5ce5d83008d07eb43 to your computer and use it in GitHub Desktop.
import random
class Ngram:
def __init__(self, doc, n=2):
self.N = n
self.wordlist(doc)
self.make_db()
def wordlist(self, doc):
with open(doc, 'r') as f:
self.words = f.read().split()
def grams(self):
if len(self.words) < self.N:
raise "Document too short"
else:
for i in range(len(self.words) - (self.N-1)):
chunk = self.words[i:(i+self.N)]
k = tuple(chunk[:-1])
v = chunk[-1]
yield k, v
def make_db(self):
self.db = {}
for k, v in self.grams():
if k in self.db.keys():
self.db[k].append(v)
else:
self.db[k] = [v]
def make(self, length=20):
sentence = list(random.choice(self.db.keys()))
for _ in xrange(length):
state = tuple(sentence[-(self.N-1):])
next_word = random.choice(self.db[state])
sentence.append(next_word)
return ' '.join(sentence)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment