Yuktha-Majella

## Predication
test_data = ImageList.from_df(test, cols=['image_names'], path='/content/drive/MyDrive/CV_Vehicle_classification/train_data/images')
t_rn50 = load_learner('/content/drive/MyDrive/CV_Vehicle_classification/model/', 'Bmodel_fastai_resnet50.h5', test = test_data)
y_trn50 = t_rn50.TTA(ds_type = DatasetType.Test)
preds = y_trn50[0].argmax(-1)

## create_dict
import gensim
from gensim import corpora

text1 = ["""Gensim is a free open-source Python library for representing documents as semantic vectors,
           as efficiently and painlessly as possible. Gensim is designed
           to process raw, unstructured digital texts using unsupervised machine learning algorithms."""]

tokens1 = [[item for item in line.split()] for line in text1]
g_dict1 = corpora.Dictionary(tokens1)

## create_dict_textfile
from gensim.utils import simple_preprocess
from gensim import corpora

text2 = open('sample_text.txt', encoding ='utf-8')

tokens2 =[]
for line in text2.read().split('.'):
  tokens2.append(simple_preprocess(line, deacc = True))

g_dict2 = corpora.Dictionary(tokens2)

## freeze_model
t_cnn1.fit_one_cycle(10,max_lr=slice(1e-5, 1e-4))
t_cnn1.freeze()
t_cnn1.export('/content/drive/MyDrive/CV_Vehicle_classification/model/Bmodel_fastai_resnet50.h5')

## dict_update
g_dict1.add_documents(tokens2)

print("The dictionary has: " +str(len(g_dict1)) + " tokens\n")
print(g_dict1.token2id)

## create_bow
g_bow =[g_dict1.doc2bow(token, allow_update = True) for token in tokens1]
print("Bag of Words : ", g_bow)

## save_load_DictBow
# Save the Dictionary and BOW
g_dict1.save('/content/drive/MyDrive/gensim_tutorial/g_dict1.dict')
corpora.MmCorpus.serialize('/content/drive/MyDrive/gensim_tutorial/g_bow1.mm', g_bow)

# Load the Dictionary and BOW
g_dict_load = corpora.Dictionary.load('/content/drive/MyDrive/gensim_tutorial/g_dict1.dict')
g_bow_load = corpora.MmCorpus('/content/drive/MyDrive/gensim_tutorial/g_bow1.mm')

## create_tfidf
text = ["The food is excellent but the service can be better",
        "The food is always delicious and loved the service",
        "The food was mediocre and the service was terrible"]

g_dict = corpora.Dictionary([simple_preprocess(line) for line in text])
g_bow = [g_dict.doc2bow(simple_preprocess(line)) for line in text]

print("Dictionary : ")
for item in g_bow:
    print([[g_dict[id], freq] for id, freq in item])

## create_bigrams_trigrams
import gensim.downloader as api
from gensim.models.phrases import Phrases

dataset = api.load("text8")
tokens = [word for word in dataset]

bigram_model = Phrases(tokens, min_count = 3, threshold = 10)
print(bigram_model[tokens[0]])

trigram_model = Phrases(bigram_model[data], threshold = 10)

## create_word2vec
from gensim.models.word2vec import Word2Vec
from multiprocessing import cpu_count
import gensim.downloader as api

dataset = api.load("text8")
words = [d for d in dataset]

data1 = words[:1000]
w2v_model = Word2Vec(data1, min_count = 0, workers=cpu_count())
	test_data = ImageList.from_df(test, cols=['image_names'], path='/content/drive/MyDrive/CV_Vehicle_classification/train_data/images')
	t_rn50 = load_learner('/content/drive/MyDrive/CV_Vehicle_classification/model/', 'Bmodel_fastai_resnet50.h5', test = test_data)
	y_trn50 = t_rn50.TTA(ds_type = DatasetType.Test)
	preds = y_trn50[0].argmax(-1)
	import gensim
	from gensim import corpora

	text1 = ["""Gensim is a free open-source Python library for representing documents as semantic vectors,
	as efficiently and painlessly as possible. Gensim is designed
	to process raw, unstructured digital texts using unsupervised machine learning algorithms."""]

	tokens1 = [[item for item in line.split()] for line in text1]
	g_dict1 = corpora.Dictionary(tokens1)
	from gensim.utils import simple_preprocess
	from gensim import corpora

	text2 = open('sample_text.txt', encoding ='utf-8')

	tokens2 =[]
	for line in text2.read().split('.'):
	tokens2.append(simple_preprocess(line, deacc = True))

	g_dict2 = corpora.Dictionary(tokens2)
	t_cnn1.fit_one_cycle(10,max_lr=slice(1e-5, 1e-4))
	t_cnn1.freeze()
	t_cnn1.export('/content/drive/MyDrive/CV_Vehicle_classification/model/Bmodel_fastai_resnet50.h5')
	g_dict1.add_documents(tokens2)

	print("The dictionary has: " +str(len(g_dict1)) + " tokens\n")
	print(g_dict1.token2id)
	g_bow =[g_dict1.doc2bow(token, allow_update = True) for token in tokens1]
	print("Bag of Words : ", g_bow)
	# Save the Dictionary and BOW
	g_dict1.save('/content/drive/MyDrive/gensim_tutorial/g_dict1.dict')
	corpora.MmCorpus.serialize('/content/drive/MyDrive/gensim_tutorial/g_bow1.mm', g_bow)

	# Load the Dictionary and BOW
	g_dict_load = corpora.Dictionary.load('/content/drive/MyDrive/gensim_tutorial/g_dict1.dict')
	g_bow_load = corpora.MmCorpus('/content/drive/MyDrive/gensim_tutorial/g_bow1.mm')
	text = ["The food is excellent but the service can be better",
	"The food is always delicious and loved the service",
	"The food was mediocre and the service was terrible"]

	g_dict = corpora.Dictionary([simple_preprocess(line) for line in text])
	g_bow = [g_dict.doc2bow(simple_preprocess(line)) for line in text]

	print("Dictionary : ")
	for item in g_bow:
	print([[g_dict[id], freq] for id, freq in item])
	import gensim.downloader as api
	from gensim.models.phrases import Phrases

	dataset = api.load("text8")
	tokens = [word for word in dataset]

	bigram_model = Phrases(tokens, min_count = 3, threshold = 10)
	print(bigram_model[tokens[0]])

	trigram_model = Phrases(bigram_model[data], threshold = 10)
	from gensim.models.word2vec import Word2Vec
	from multiprocessing import cpu_count
	import gensim.downloader as api

	dataset = api.load("text8")
	words = [d for d in dataset]

	data1 = words[:1000]
	w2v_model = Word2Vec(data1, min_count = 0, workers=cpu_count())