Skip to content

Instantly share code, notes, and snippets.

@ikatsov
Created March 15, 2020 16:44
Show Gist options
  • Save ikatsov/233d2dbaf5be0b280a3b4a90ae978726 to your computer and use it in GitHub Desktop.
Save ikatsov/233d2dbaf5be0b280a3b4a90ae978726 to your computer and use it in GitHub Desktop.
from gensim.models.doc2vec import TaggedDocument
EMBEDDING_DIM = 200 # dimensionality of user representation
class TaggedDocumentIterator(object):
def __iter__(self):
for row in self.df.itertuples():
yield TaggedDocument(
words=dict(row._asdict())['all_orders'].split(),
tags=[dict(row._asdict())['user_id']])
it = TaggedDocumentIterator(orders_by_uid)
doc_model = gensim.models.Doc2Vec(vector_size=EMBEDDING_DIM,
window=5,
min_count=10,
workers=mp.cpu_count(),
alpha=0.055,
min_alpha=0.055,
epochs=120)
train_corpus = list(it)
doc_model.build_vocab(train_corpus)
for epoch in range(10):
doc_model.alpha -= 0.005 # decrease the learning rate
doc_model.min_alpha = doc_model.alpha
doc_model.train(train_corpus,
total_examples=doc_model.corpus_count,
epochs=doc_model.iter)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment