Skip to content

Instantly share code, notes, and snippets.

@balajikvijayan
Created December 1, 2015 22:23
Show Gist options
  • Save balajikvijayan/9f7ab00f9bfd0bf56b14 to your computer and use it in GitHub Desktop.
Save balajikvijayan/9f7ab00f9bfd0bf56b14 to your computer and use it in GitHub Desktop.
from gensim import models
sentence = models.doc2vec.LabeledSentence(
words=[u'so`bme', u'words', u'here'], tags=["SENT_0"])
sentence1 = models.doc2vec.LabeledSentence(
words=[u'here', u'we', u'go'], tags=["SENT_1"])
sentences = [sentence, sentence1]
class LabeledLineSentence(object):
def __init__(self, filename):
self.filename = filename
def __iter__(self):
for uid, line in enumerate(open(filename)):
yield LabeledSentence(words=line.split(), labels=['SENT_%s' % uid])
model = models.Doc2Vec(alpha=.025, min_alpha=.025, min_count=1)
model.build_vocab(sentences)
for epoch in range(10):
model.train(sentences)
model.alpha -= 0.002 # decrease the learning rate`
model.min_alpha = model.alpha # fix the learning rate, no decay
model.save("my_model.doc2vec")
model_loaded = models.Doc2Vec.load('my_model.doc2vec')
print model.docvecs.most_similar(["SENT_0"])
print model_loaded.docvecs.most_similar(["SENT_1"])
@daniel-draper
Copy link

How can I change "print model.docvecs.most_similar(["SENT_0"]) " if I am using the LabeledLineSentence class?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment