Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Online learning for Doc2Vec
import logging
from gensim.models.doc2vec import (
Doc2Vec,
TaggedDocument,
)
logging.basicConfig(
format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s',
level=logging.DEBUG,
)
def to_str(d):
return ", ".join(d.keys())
SENTS = [
"anecdotal using a personal experience or an isolated example instead of a sound argument or compelling evidence",
"plausible thinking that just because something is plausible means that it is true",
"occam razor is used as a heuristic technique discovery tool to guide scientists in the development of theoretical models rather than as an arbiter between published models",
"karl popper argues that a preference for simple theories need not appeal to practical or aesthetic considerations",
"the successful prediction of a stock future price could yield significant profit",
]
SENTS = [s.split() for s in SENTS]
def main():
sentences_1 = [
TaggedDocument(SENTS[0], tags=['SENT_0']),
TaggedDocument(SENTS[1], tags=['SENT_0']),
TaggedDocument(SENTS[2], tags=['SENT_1']),
]
sentences_2 = [
TaggedDocument(SENTS[3], tags=['SENT_1']),
TaggedDocument(SENTS[4], tags=['SENT_2']),
]
model = Doc2Vec(min_count=1, workers=1)
model.build_vocab(sentences_1)
model.train(sentences_1)
print("-- Base model")
print("Vocabulary:", to_str(model.vocab))
print("Tags:", to_str(model.docvecs.doctags))
model.build_vocab(sentences_2, update=True)
model.train(sentences_2)
print("-- Updated model")
print("Vocabulary:", to_str(model.vocab))
print("Tags:", to_str(model.docvecs.doctags))
if __name__ == '__main__':
main()
@ajoeajoe
Copy link

ajoeajoe commented May 3, 2017

build_vocab() got an unexpected keyword argument 'update'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment