Skip to content

Instantly share code, notes, and snippets.

@kudkudak
Created April 11, 2016 06:20
Show Gist options
  • Save kudkudak/a78e8d0d06c56da59dc04be092a22a0f to your computer and use it in GitHub Desktop.
Save kudkudak/a78e8d0d06c56da59dc04be092a22a0f to your computer and use it in GitHub Desktop.
from scipy.stats.stats import spearmanr
import numpy as np
import cPickle
# Same score for the second included embedding
E = cPickle.load(open("Trans_embds/D_RNN_500k_144h.pkl"))
data = pd.read_csv("SimLex-999/SimLex-999.txt", sep="\t")
scores, golden_ratings = [], []
for _, row in data.iterrows():
if row['word1'] in E and row['word2'] in E:
v1, v2 = d[row['word1']], d[row['word2']]
scores.append(v1.dot(v2.T)/(np.linalg.norm(v1) * np.linalg.norm(v2)))
golden_ratings.append(row['SimLex999'])
else:
print "Filtered ", row['word1'], row['word2']
print spearmanr(scores, golden_ratings)[0]
"""
Output:
Filtered disorganize organize
0.459089707916
""""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment