Yuktha-Majella/create_dict_textfile

## create_dict_textfile
from gensim.utils import simple_preprocess
from gensim import corpora

text2 = open('sample_text.txt', encoding ='utf-8')

tokens2 =[]
for line in text2.read().split('.'):
  tokens2.append(simple_preprocess(line, deacc = True))

g_dict2 = corpora.Dictionary(tokens2)

print("The dictionary has: " +str(len(g_dict2)) + " tokens\n")
print(g_dict2.token2id)

## sample_text.txt
NLP is a branch of data science that consists of systematic processes for analyzing, understanding, and deriving information from the text data in a smart and efficient manner. By utilizing NLP and its components, one can organize the massive chunks of text data, perform numerous automated tasks and solve a wide range of problems such as – automatic summarization, machine translation, named entity recognition, relationship extraction, sentiment analysis, speech recognition, and topic segmentation etc.
	from gensim.utils import simple_preprocess
	from gensim import corpora

	text2 = open('sample_text.txt', encoding ='utf-8')

	tokens2 =[]
	for line in text2.read().split('.'):
	tokens2.append(simple_preprocess(line, deacc = True))

	g_dict2 = corpora.Dictionary(tokens2)

	print("The dictionary has: " +str(len(g_dict2)) + " tokens\n")
	print(g_dict2.token2id)