trylks/tweets.py

## tweets.py
import nltk
from nltk.probability import LidstoneProbDist
from nltk.model.ngram import NgramModel
import pandas as pd

tweets = pd.read_csv('tweeets.csv')
tokenize = lambda x: nltk.word_tokenize(str(x))
train = [tokenize(text) for text in tweets[tweets.user == 'trylks']['text']]
text = tokenize("I think that the #Python library #nltk is great")

# here is the good part
estimator = lambda fdist, bins: LidstoneProbDist(fdist, 0.2)
model = NgramModel(3, train, estimator=estimator)
perplexity = model.perplexity(text)
	import nltk
	from nltk.probability import LidstoneProbDist
	from nltk.model.ngram import NgramModel
	import pandas as pd

	tweets = pd.read_csv('tweeets.csv')
	tokenize = lambda x: nltk.word_tokenize(str(x))
	train = [tokenize(text) for text in tweets[tweets.user == 'trylks']['text']]
	text = tokenize("I think that the #Python library #nltk is great")

	# here is the good part
	estimator = lambda fdist, bins: LidstoneProbDist(fdist, 0.2)
	model = NgramModel(3, train, estimator=estimator)
	perplexity = model.perplexity(text)