Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Doc2Vec Iterator
from gensim.models.doc2vec import LabeledSentence
import re
class DocIterator(object):
def __init__(self, doc_list, labels_list):
self.labels_list = labels_list
self.doc_list = doc_list
def __iter__(self):
for idx, doc in enumerate(self.doc_list):
yield LabeledSentence(words=doc.split(),tags=[self.labels_list[idx]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment