Skip to content

Instantly share code, notes, and snippets.

@seangibat
Last active August 29, 2015 14:27
Show Gist options
  • Save seangibat/821bda4747033a547042 to your computer and use it in GitHub Desktop.
Save seangibat/821bda4747033a547042 to your computer and use it in GitHub Desktop.
import nltk
from nltk.tokenize import word_tokenize
from nltk.probability import LidstoneProbDist
from nltk.model import NgramModel
import re
def generate_text(model, number_words=15):
text_words = model.generate(number_words)
return ' '.join([word for word in text_words])
def generate_model(filename='document1.txt', ngrams=3):
f = open(filename)
raw = f.read().lower()
f.close()
tokens = list(word_tokenize(corpus))
estimator = lambda fdist, bins: LidstoneProbDist(fdist, 0.2)
return NgramModel(ngramss, tokens, False, False, estimator)
# text = re.sub('\s[.:;?,)\'!]', dashrepl, text)
# text = re.split('^,`]\s', text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment