Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Function returning vector reperesentation of a document
def get_embedding_w2v(doc_tokens):
embeddings = []
if len(doc_tokens)<1:
return np.zeros(300)
else:
for tok in doc_tokens:
if tok in w2v_model.wv.vocab:
embeddings.append(w2v_model.wv.word_vec(tok))
else:
embeddings.append(np.random.rand(300))
# mean the vectors of individual words to get the vector of the document
return np.mean(embeddings, axis=0)
# Getting Word2Vec Vectors for Testing Corpus and Queries
testing_corpus['vector']=testing_corpus['lemmatized'].apply(lambda x :get_embedding_w2v(x.split()))
testing_queries['vector']=testing_queries['cleaned'].apply(lambda x :get_embedding_w2v(x.split()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment