Skip to content

Instantly share code, notes, and snippets.

@eyyub
Last active January 12, 2018 09:09
Show Gist options
  • Save eyyub/748ce02e0cc1c6186ca2304641fefcd3 to your computer and use it in GitHub Desktop.
Save eyyub/748ce02e0cc1c6186ca2304641fefcd3 to your computer and use it in GitHub Desktop.
import sys
import gensim
import numpy as np
W2V_PATH = sys.argv[1]
def avg_sentence(sentence, wv):
v = np.zeros(300)
for w in sentence:
if w in wv:
v += wv[w]
return v / len(sentence)
def cosine_sim(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
url_descr = [
('http://tny.im/a13', 'we don\'t care'),
('https://tny.im/a0-', 'i love you'),
('https://tny.im/a12', 'relax take it easy'),
('https://tny.im/a16', 'that is embarrassing'),
('https://tny.im/a10', 'screw you guys')
]
model = gensim.models.KeyedVectors.load_word2vec_format(W2V_PATH, binary=True)
inputv = avg_sentence(input().split(), model.wv)
avgs = list(map(lambda p: p + (avg_sentence(p[1].split(), model.wv),), url_descr))
sims = list(map(lambda p: p[:2] + (cosine_sim(inputv, p[2]),), avgs))
most_similar_meme = sorted(sims, key=lambda p: p[2], reverse=True)
print(most_similar_meme)
Input: i like you
Output: [
('https://tny.im/a0-', 'i love you', 0.87972317862080773),
('https://tny.im/a10', 'screw you guys', 0.66992365073780347),
('http://tny.im/a13', "we don't care", 0.56639891559620026),
('https://tny.im/a12', 'relax take it easy', 0.40517121688823338),
('https://tny.im/a16', 'that is embarrassing', 0.2843743794717129)
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment