Skip to content

Instantly share code, notes, and snippets.

@amn41
Last active December 30, 2016 02:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amn41/bf868f1cef01051dabc1f18bc4cf8fcf to your computer and use it in GitHub Desktop.
Save amn41/bf868f1cef01051dabc1f18bc4cf8fcf to your computer and use it in GitHub Desktop.
def find_similar_words(embed,text,refs,thresh):
C = np.zeros((len(refs),embed.W.shape[1]))
for idx, term in enumerate(refs):
if term in embed.vocab:
C[idx,:] = embed.W[embed.vocab[term], :]
tokens = text.split(' ')
scores = [0.] * len(tokens)
found=[]
for idx, term in enumerate(tokens):
if term in embed.vocab:
vec = embed.W[embed.vocab[term], :]
cosines = np.dot(C,vec.T)
score = np.mean(cosines)
scores[idx] = score
if (score > thresh):
found.append(term)
print scores
return found
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment