Skip to content

Instantly share code, notes, and snippets.

@monisoi
Last active October 7, 2018 01:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save monisoi/c5e34bce290c70c1a3f0587da6963477 to your computer and use it in GitHub Desktop.
Save monisoi/c5e34bce290c70c1a3f0587da6963477 to your computer and use it in GitHub Desktop.
find a similar movie by doc2vec
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from gensim.models.doc2vec import Doc2Vec
from gensim.models.doc2vec import TaggedDocument
import sys
def create_model(train_data, model_name):
with open(train_data, 'r') as f:
train_corpus = [TaggedDocument(words=data.split(','), tags=[i])
for i, data in enumerate(f)]
model = Doc2Vec(documents=train_corpus, dm=1,
vector_size=300, window=8, min_count=1, workers=4)
model.train(train_corpus, total_examples=model.corpus_count, epochs=50)
model.save(model_name)
if __name__ == '__main__':
train_data = sys.argv[1]
model_name = sys.argv[2]
# ex. MyModel.model
create_model(train_data, model_name)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from gensim.models.doc2vec import Doc2Vec
import sys
import csv
class SimilarMovieFinder:
def __init__(self, train_file_name, model_file_name):
self.sentences = self.load_sentences(train_file_name)
self.model = Doc2Vec.load(model_file_name)
def load_sentences(self, file_name):
sentences = []
with open(file_name, 'r') as f:
reader = csv.reader(f)
for sentence in reader:
sentences.append(sentence)
return sentences
def find(self, target_id):
most_similar_id = self.model.docvecs.most_similar(target_id)[0][0]
print(self.sentences[target_id])
print(self.sentences[most_similar_id])
if __name__ == '__main__':
train_file_name = sys.argv[1] # ex) ./train/train.csv
model_file_name = sys.argv[2] # ex) ./model/model.model
target_id = sys.argv[3]
f = SimilarMovieFinder(train_file_name, model_file_name)
print(f.find(int(target_id)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment