biranchi2018/bigram_tokens.py

## bigram_tokens.py
import nltk
nltk.download('punkt')
import itertools

text = "today is 'Nayan's birthday. she loves ice cream. she is also fond of cream cake. we will celebrate her birthday with ice cream cake"

sentences = nltk.sent_tokenize(text)
words = [nltk.word_tokenize(sent) for sent in sentences]
print(words)


flattened_list  = list(itertools.chain(*words))
flattened_list

len(flattened_list)
# prints 28

from nltk.util import ngrams
tokens = [token for token in flattened_list if token != ""]
output = list(ngrams(tokens, 2))
output

'''
Output:

[('today', 'is'),
 ('is', "'Nayan"),
 ("'Nayan", "'s"),
 ("'s", 'birthday'),
 ('birthday', '.'),
 ('.', 'she'),
 ('she', 'loves'),
 ('loves', 'ice'),
 ('ice', 'cream'),
 ('cream', '.'),
 ('.', 'she'),
 ('she', 'is'),
 ('is', 'also'),
 ('also', 'fond'),
 ('fond', 'of'),
 ('of', 'cream'),
 ('cream', 'cake'),
 ('cake', '.'),
 ('.', 'we'),
 ('we', 'will'),
 ('will', 'celebrate'),
 ('celebrate', 'her'),
 ('her', 'birthday'),
 ('birthday', 'with'),
 ('with', 'ice'),
 ('ice', 'cream'),
 ('cream', 'cake')]
'''

len(output)
# prints 27
	import nltk
	nltk.download('punkt')
	import itertools

	text = "today is 'Nayan's birthday. she loves ice cream. she is also fond of cream cake. we will celebrate her birthday with ice cream cake"

	sentences = nltk.sent_tokenize(text)
	words = [nltk.word_tokenize(sent) for sent in sentences]
	print(words)


	flattened_list = list(itertools.chain(*words))
	flattened_list

	len(flattened_list)
	# prints 28

	from nltk.util import ngrams
	tokens = [token for token in flattened_list if token != ""]
	output = list(ngrams(tokens, 2))
	output

	'''
	Output:

	[('today', 'is'),
	('is', "'Nayan"),
	("'Nayan", "'s"),
	("'s", 'birthday'),
	('birthday', '.'),
	('.', 'she'),
	('she', 'loves'),
	('loves', 'ice'),
	('ice', 'cream'),
	('cream', '.'),
	('.', 'she'),
	('she', 'is'),
	('is', 'also'),
	('also', 'fond'),
	('fond', 'of'),
	('of', 'cream'),
	('cream', 'cake'),
	('cake', '.'),
	('.', 'we'),
	('we', 'will'),
	('will', 'celebrate'),
	('celebrate', 'her'),
	('her', 'birthday'),
	('birthday', 'with'),
	('with', 'ice'),
	('ice', 'cream'),
	('cream', 'cake')]
	'''

	len(output)
	# prints 27