Yuktha-Majella

## create_bow
g_bow =[g_dict1.doc2bow(token, allow_update = True) for token in tokens1]
print("Bag of Words : ", g_bow)

## save_load_DictBow
# Save the Dictionary and BOW
g_dict1.save('/content/drive/MyDrive/gensim_tutorial/g_dict1.dict')
corpora.MmCorpus.serialize('/content/drive/MyDrive/gensim_tutorial/g_bow1.mm', g_bow)

# Load the Dictionary and BOW
g_dict_load = corpora.Dictionary.load('/content/drive/MyDrive/gensim_tutorial/g_dict1.dict')
g_bow_load = corpora.MmCorpus('/content/drive/MyDrive/gensim_tutorial/g_bow1.mm')

## create_tfidf
text = ["The food is excellent but the service can be better",
        "The food is always delicious and loved the service",
        "The food was mediocre and the service was terrible"]

g_dict = corpora.Dictionary([simple_preprocess(line) for line in text])
g_bow = [g_dict.doc2bow(simple_preprocess(line)) for line in text]

print("Dictionary : ")
for item in g_bow:
    print([[g_dict[id], freq] for id, freq in item])

## create_bigrams_trigrams
import gensim.downloader as api
from gensim.models.phrases import Phrases

dataset = api.load("text8")
tokens = [word for word in dataset]

bigram_model = Phrases(tokens, min_count = 3, threshold = 10)
print(bigram_model[tokens[0]])

trigram_model = Phrases(bigram_model[data], threshold = 10)

## create_word2vec
from gensim.models.word2vec import Word2Vec
from multiprocessing import cpu_count
import gensim.downloader as api

dataset = api.load("text8")
words = [d for d in dataset]

data1 = words[:1000]
w2v_model = Word2Vec(data1, min_count = 0, workers=cpu_count())

## save_load_word2vec
w2v_model.save('/content/drive/MyDrive/gensim_practice/w2v_model1')
w2v_model = Word2Vec.load('/content/drive/MyDrive/gensim_practice/w2v_model1')

## update_word2vec
data2 = words[1000:]
w2v_model.build_vocab(data2, update=True)
w2v_model.train(data2, total_examples=w2v_model.corpus_count, epochs=w2v_model.iter)
w2v_model['social']

## Gensim_installation
#Using Pip installer:
pip install --upgrade gensim

#Using Conda environment:
conda install -c conda-forge gensim

## summarize_text_by_ratio
summary_ratio=summarize(text,ratio=0.1)
print(summary_ratio)

## summarize_text_by_word_count
summary_wordcount=summarize(text,word_count=50)
print(summary_wordcount)
	g_bow =[g_dict1.doc2bow(token, allow_update = True) for token in tokens1]
	print("Bag of Words : ", g_bow)
	# Save the Dictionary and BOW
	g_dict1.save('/content/drive/MyDrive/gensim_tutorial/g_dict1.dict')
	corpora.MmCorpus.serialize('/content/drive/MyDrive/gensim_tutorial/g_bow1.mm', g_bow)

	# Load the Dictionary and BOW
	g_dict_load = corpora.Dictionary.load('/content/drive/MyDrive/gensim_tutorial/g_dict1.dict')
	g_bow_load = corpora.MmCorpus('/content/drive/MyDrive/gensim_tutorial/g_bow1.mm')
	text = ["The food is excellent but the service can be better",
	"The food is always delicious and loved the service",
	"The food was mediocre and the service was terrible"]

	g_dict = corpora.Dictionary([simple_preprocess(line) for line in text])
	g_bow = [g_dict.doc2bow(simple_preprocess(line)) for line in text]

	print("Dictionary : ")
	for item in g_bow:
	print([[g_dict[id], freq] for id, freq in item])
	import gensim.downloader as api
	from gensim.models.phrases import Phrases

	dataset = api.load("text8")
	tokens = [word for word in dataset]

	bigram_model = Phrases(tokens, min_count = 3, threshold = 10)
	print(bigram_model[tokens[0]])

	trigram_model = Phrases(bigram_model[data], threshold = 10)
	from gensim.models.word2vec import Word2Vec
	from multiprocessing import cpu_count
	import gensim.downloader as api

	dataset = api.load("text8")
	words = [d for d in dataset]

	data1 = words[:1000]
	w2v_model = Word2Vec(data1, min_count = 0, workers=cpu_count())
	w2v_model.save('/content/drive/MyDrive/gensim_practice/w2v_model1')
	w2v_model = Word2Vec.load('/content/drive/MyDrive/gensim_practice/w2v_model1')
	data2 = words[1000:]
	w2v_model.build_vocab(data2, update=True)
	w2v_model.train(data2, total_examples=w2v_model.corpus_count, epochs=w2v_model.iter)
	w2v_model['social']
	#Using Pip installer:
	pip install --upgrade gensim

	#Using Conda environment:
	conda install -c conda-forge gensim
	summary_wordcount=summarize(text,word_count=50)
	print(summary_wordcount)