Skip to content

Instantly share code, notes, and snippets.

@eyyub
Last active August 28, 2017 18:15
Show Gist options
  • Save eyyub/4fefe3550eda12bde77b2eddc4efc299 to your computer and use it in GitHub Desktop.
Save eyyub/4fefe3550eda12bde77b2eddc4efc299 to your computer and use it in GitHub Desktop.
import sys
import gensim
import numpy as np
W2V_PATH = sys.argv[1]
def avg_sentence(sentence, wv):
v = np.zeros(300)
for w in sentence:
if w in wv:
v += wv[w]
return v / len(sentence)
def cosine_sim(a, b):
return np.dot(a, b)/(np.linalg.norm(a) * np.linalg.norm(b))
url_descr = [
('https://tny.im/a13', 'we don\'t care'),
('https://tny.im/a0-', 'i love you'),
('https://tny.im/a12', 'relax take it easy'),
('https://tny.im/a16', 'that is embarrassing'),
('https://tny.im/a10', 'screw you guys')
]
model = gensim.models.KeyedVectors.load_word2vec_format(W2V_PATH, binary=True)
inputv = avg_sentence(input().split(), model.wv)
avgs = list(map(lambda p: p + (avg_sentence(p[1].split(), model.wv),), url_descr))
sims = list(map(lambda p: p[:2] + (cosine_sim(inputv, p[2]),), avgs))
most_similar_meme = sorted(sims, key=lambda p: p[2], reverse=True)
print(most_similar_meme)
Input: i like you
Output: [('https://tny.im/a0-', 'i love you', 0.87972317862080773), ('https://tny.im/a10', 'screw you guys', 0.66992365073780347), ('http://tny.im/a13', "we don't care", 0.56639891559620026), ('https://tny.im/a12', 'relax take it easy', 0.40517121688823338), ('https://tny.im/a16', 'that is embarrassing', 0.2843743794717129)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment