Skip to content

Instantly share code, notes, and snippets.

@Miopas

Miopas/gensim.py

Created Feb 27, 2019
Embed
What would you like to do?
train word2vector model with gensim
# train
import logging
import os
from gensim.models import word2vec
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
sentences = word2vec.LineSentence('/path/to/your/data')
model = word2vec.Word2Vec(sentences,
size=300,
window=5,
min_count=5,
workers=32,
sg=1, # skip-gram
hs=1,
negative=0,
alpha=0.025,
min_alpha=0.0001,
seed=42,
iter=3)
model.save("data/model/dd.word2vec_gensim.model")
model.wv.save_word2vec_format("data/model/dd.word2vec_org", "data/model/dd.vocabulary", binary=False)
# test
#model = word2vec.Word2Vec.load('data/model/dd.word2vec_gensim.model')
for e in model.most_similar("安"): print(e[0], e[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment