This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
test_data = ImageList.from_df(test, cols=['image_names'], path='/content/drive/MyDrive/CV_Vehicle_classification/train_data/images') | |
t_rn50 = load_learner('/content/drive/MyDrive/CV_Vehicle_classification/model/', 'Bmodel_fastai_resnet50.h5', test = test_data) | |
y_trn50 = t_rn50.TTA(ds_type = DatasetType.Test) | |
preds = y_trn50[0].argmax(-1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gensim | |
from gensim import corpora | |
text1 = ["""Gensim is a free open-source Python library for representing documents as semantic vectors, | |
as efficiently and painlessly as possible. Gensim is designed | |
to process raw, unstructured digital texts using unsupervised machine learning algorithms."""] | |
tokens1 = [[item for item in line.split()] for line in text1] | |
g_dict1 = corpora.Dictionary(tokens1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.utils import simple_preprocess | |
from gensim import corpora | |
text2 = open('sample_text.txt', encoding ='utf-8') | |
tokens2 =[] | |
for line in text2.read().split('.'): | |
tokens2.append(simple_preprocess(line, deacc = True)) | |
g_dict2 = corpora.Dictionary(tokens2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
t_cnn1.fit_one_cycle(10,max_lr=slice(1e-5, 1e-4)) | |
t_cnn1.freeze() | |
t_cnn1.export('/content/drive/MyDrive/CV_Vehicle_classification/model/Bmodel_fastai_resnet50.h5') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
g_dict1.add_documents(tokens2) | |
print("The dictionary has: " +str(len(g_dict1)) + " tokens\n") | |
print(g_dict1.token2id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
g_bow =[g_dict1.doc2bow(token, allow_update = True) for token in tokens1] | |
print("Bag of Words : ", g_bow) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Save the Dictionary and BOW | |
g_dict1.save('/content/drive/MyDrive/gensim_tutorial/g_dict1.dict') | |
corpora.MmCorpus.serialize('/content/drive/MyDrive/gensim_tutorial/g_bow1.mm', g_bow) | |
# Load the Dictionary and BOW | |
g_dict_load = corpora.Dictionary.load('/content/drive/MyDrive/gensim_tutorial/g_dict1.dict') | |
g_bow_load = corpora.MmCorpus('/content/drive/MyDrive/gensim_tutorial/g_bow1.mm') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text = ["The food is excellent but the service can be better", | |
"The food is always delicious and loved the service", | |
"The food was mediocre and the service was terrible"] | |
g_dict = corpora.Dictionary([simple_preprocess(line) for line in text]) | |
g_bow = [g_dict.doc2bow(simple_preprocess(line)) for line in text] | |
print("Dictionary : ") | |
for item in g_bow: | |
print([[g_dict[id], freq] for id, freq in item]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gensim.downloader as api | |
from gensim.models.phrases import Phrases | |
dataset = api.load("text8") | |
tokens = [word for word in dataset] | |
bigram_model = Phrases(tokens, min_count = 3, threshold = 10) | |
print(bigram_model[tokens[0]]) | |
trigram_model = Phrases(bigram_model[data], threshold = 10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.models.word2vec import Word2Vec | |
from multiprocessing import cpu_count | |
import gensim.downloader as api | |
dataset = api.load("text8") | |
words = [d for d in dataset] | |
data1 = words[:1000] | |
w2v_model = Word2Vec(data1, min_count = 0, workers=cpu_count()) |
OlderNewer