Last active
July 20, 2018 18:51
-
-
Save ThomasDelteil/9f9a68d84d05cf3fdb0f1947e89110f4 to your computer and use it in GitHub Desktop.
gluon_nlp_blog
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mxnet as mx | |
import gluonnlp as nlp | |
# Load GloVe word embeddings | |
glove = nlp.embedding.create('glove', source='glove.6B.50d') | |
# Compute 'baby' and 'infant' word embeddings | |
baby_glove, infant_glove = glove['baby'], glove['infant'] | |
# Load pre-trained AWD LSTM language model and get the embedding | |
lm_model, lm_vocab = nlp.model.get_model(name='awd_lstm_lm_1150', | |
dataset_name='wikitext-2', | |
pretrained=True) | |
baby_idx, infant_idx = lm_vocab['baby', 'infant'] | |
lm_embedding = lm_model.embedding[0] | |
# Get the word embeddings of 'baby' and 'infant' | |
baby_lm, infant_lm = lm_embedding(mx.nd.array([baby_idx, infant_idx])) | |
# cosine similarity | |
def cos_similarity(vec1, vec2): | |
return mx.nd.dot(vec1, vec2) / (vec1.norm() * vec2.norm()) | |
print(cos_similarity(baby_glove, infant_glove)) # 0.74056691 | |
print(cos_similarity(baby_lm, infant_lm)) # 0.3729561 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment