samidarko/gist:0985fe709e9113021fed8b7416c49518

## gistfile1.txt
twitt = """What's a little DUI on the way to gobble fries with a "friend"? And that playful punch? Total misunderstanding!!"""
from nltk.tokenize import TweetTokenizer
tknzr = TweetTokenizer()
tknzr.tokenize(twitt)
tokens = tknzr.tokenize(twitt)
from nltk.util import everygrams
everygrams(tknzr.tokenize(twitt))
list(everygrams(tknzr.tokenize(twitt)))
tokens
list(set(tokens))
vocab.update(list(set(tokens)))
vocab
vocab.lookup(['to'])
vocab.lookup(['to', 'a'])
text = [everygrams(tknzr.tokenize(twitt))]
counts.update(vocab.lookup(sent) for sent in text)
counts
counts.unigrams
lm = MLE(3)
lm.counts = counts
lm.vocab = vocab
	twitt = """What's a little DUI on the way to gobble fries with a "friend"? And that playful punch? Total misunderstanding!!"""
	from nltk.tokenize import TweetTokenizer
	tknzr = TweetTokenizer()
	tknzr.tokenize(twitt)
	tokens = tknzr.tokenize(twitt)
	from nltk.util import everygrams
	everygrams(tknzr.tokenize(twitt))
	list(everygrams(tknzr.tokenize(twitt)))
	tokens
	list(set(tokens))
	vocab.update(list(set(tokens)))
	vocab
	vocab.lookup(['to'])
	vocab.lookup(['to', 'a'])
	text = [everygrams(tknzr.tokenize(twitt))]
	counts.update(vocab.lookup(sent) for sent in text)
	counts
	counts.unigrams
	lm = MLE(3)
	lm.counts = counts
	lm.vocab = vocab