rjurney/2_cupy.py

## 2_cupy.py
from os import path
from gensim.models import Word2Vec

VOCAB_SIZE = 5000
MAX_LENGTH = 100
EMBEDDING_SIZE = 50
NUM_CORES = 64

w2v_model  = None
model_path = "data/word2vec.50000.model"

# Load the Word2Vec model if it exists
if path.exists(model_path):
        w2v_model = Word2Vec.load(model_path)
# Otherwise generate it
else:
    w2v_model = Word2Vec(
        documents,
        size=EMBEDDING_SIZE,
        min_count=1,
        window=10,
        workers=NUM_CORES,
        iter=10,
        seed=33
    )
    w2v_model.save(model_path)

print('Word2Vec model built!')

# Show that similar words to 'program' print
print(w2v_model.wv.most_similar(positive='program'))

# Encode the documents using the new embedding
encoded_docs = [[w2v_model.wv[word] for word in post] for post in documents]
	from os import path
	from gensim.models import Word2Vec

	VOCAB_SIZE = 5000
	MAX_LENGTH = 100
	EMBEDDING_SIZE = 50
	NUM_CORES = 64

	w2v_model = None
	model_path = "data/word2vec.50000.model"

	# Load the Word2Vec model if it exists
	if path.exists(model_path):
	w2v_model = Word2Vec.load(model_path)
	# Otherwise generate it
	else:
	w2v_model = Word2Vec(
	documents,
	size=EMBEDDING_SIZE,
	min_count=1,
	window=10,
	workers=NUM_CORES,
	iter=10,
	seed=33
	)
	w2v_model.save(model_path)

	print('Word2Vec model built!')

	# Show that similar words to 'program' print
	print(w2v_model.wv.most_similar(positive='program'))

	# Encode the documents using the new embedding
	encoded_docs = [[w2v_model.wv[word] for word in post] for post in documents]