rlangone/word2vec_example.py Secret

## word2vec_example.py
# load libraries
from gensim.models import KeyedVectors
import os
import requests
import gzip
import shutil

# download embedding matrix built by Google in current working directory
cwd = os.getcwd()
file_id = '0B7XkCwpI5KDYNlNUTTlSS21pQmM'
file_name_compressed = 'GoogleNews-vectors-negative300.bin.gz'
destination = os.path.join(cwd, file_name_compressed)

# function for downloading file
def download_file_from_google_drive(id, destination):

    # Code from https://stackoverflow.com/a/39225039

    URL = "https://docs.google.com/uc?export=download"
    session = requests.Session()
    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

# download file
download_file_from_google_drive(file_id, destination)

# unzip file
file_name = 'GoogleNews-vectors-negative300.bin'
with gzip.open(file_name_compressed, 'r') as f_in, open(file_name, 'wb') as f_out:
  shutil.copyfileobj(f_in, f_out)

# load the embedding matrix
model = KeyedVectors.load_word2vec_format(file_name, binary=True)

# example 1: get the word vector representation of the word apple
apple_embedding = model['apple']

# example 2: compute cosine similarity between words king and queen
print(model.similarity('king', 'queen'))
	# load libraries
	from gensim.models import KeyedVectors
	import os
	import requests
	import gzip
	import shutil

	# download embedding matrix built by Google in current working directory
	cwd = os.getcwd()
	file_id = '0B7XkCwpI5KDYNlNUTTlSS21pQmM'
	file_name_compressed = 'GoogleNews-vectors-negative300.bin.gz'
	destination = os.path.join(cwd, file_name_compressed)

	# function for downloading file
	def download_file_from_google_drive(id, destination):

	# Code from https://stackoverflow.com/a/39225039

	URL = "https://docs.google.com/uc?export=download"
	session = requests.Session()
	response = session.get(URL, params = { 'id' : id }, stream = True)
	token = get_confirm_token(response)

	if token:
	params = { 'id' : id, 'confirm' : token }
	response = session.get(URL, params = params, stream = True)

	save_response_content(response, destination)

	def get_confirm_token(response):
	for key, value in response.cookies.items():
	if key.startswith('download_warning'):
	return value

	return None

	def save_response_content(response, destination):
	CHUNK_SIZE = 32768

	with open(destination, "wb") as f:
	for chunk in response.iter_content(CHUNK_SIZE):
	if chunk: # filter out keep-alive new chunks
	f.write(chunk)

	# download file
	download_file_from_google_drive(file_id, destination)

	# unzip file
	file_name = 'GoogleNews-vectors-negative300.bin'
	with gzip.open(file_name_compressed, 'r') as f_in, open(file_name, 'wb') as f_out:
	shutil.copyfileobj(f_in, f_out)

	# load the embedding matrix
	model = KeyedVectors.load_word2vec_format(file_name, binary=True)

	# example 1: get the word vector representation of the word apple
	apple_embedding = model['apple']

	# example 2: compute cosine similarity between words king and queen
	print(model.similarity('king', 'queen'))