Skip to content

Instantly share code, notes, and snippets.

@rlangone rlangone/word2vec_example.py Secret
Last active May 14, 2019

Embed
What would you like to do?
Code for post "Sentiment analysis using word, sub-word and character embedding" on https://amethix.com/blog/
# load libraries
from gensim.models import KeyedVectors
import os
import requests
import gzip
import shutil
# download embedding matrix built by Google in current working directory
cwd = os.getcwd()
file_id = '0B7XkCwpI5KDYNlNUTTlSS21pQmM'
file_name_compressed = 'GoogleNews-vectors-negative300.bin.gz'
destination = os.path.join(cwd, file_name_compressed)
# function for downloading file
def download_file_from_google_drive(id, destination):
# Code from https://stackoverflow.com/a/39225039
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
save_response_content(response, destination)
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
# download file
download_file_from_google_drive(file_id, destination)
# unzip file
file_name = 'GoogleNews-vectors-negative300.bin'
with gzip.open(file_name_compressed, 'r') as f_in, open(file_name, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
# load the embedding matrix
model = KeyedVectors.load_word2vec_format(file_name, binary=True)
# example 1: get the word vector representation of the word apple
apple_embedding = model['apple']
# example 2: compute cosine similarity between words king and queen
print(model.similarity('king', 'queen'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.