kudkudak/evaluate_nmt.py

## evaluate_nmt.py
from scipy.stats.stats import spearmanr
import numpy as np
import cPickle

# Same score for the second included embedding
E = cPickle.load(open("Trans_embds/D_RNN_500k_144h.pkl"))
data = pd.read_csv("SimLex-999/SimLex-999.txt", sep="\t")

scores, golden_ratings = [], []
for _, row in data.iterrows():
    if row['word1'] in E and row['word2'] in E:
        v1, v2 = d[row['word1']], d[row['word2']]
        scores.append(v1.dot(v2.T)/(np.linalg.norm(v1) * np.linalg.norm(v2)))
        golden_ratings.append(row['SimLex999'])
    else:
        print "Filtered ", row['word1'], row['word2']
print spearmanr(scores, golden_ratings)[0]

"""
Output:
Filtered  disorganize organize
0.459089707916
""""
	from scipy.stats.stats import spearmanr
	import numpy as np
	import cPickle

	# Same score for the second included embedding
	E = cPickle.load(open("Trans_embds/D_RNN_500k_144h.pkl"))
	data = pd.read_csv("SimLex-999/SimLex-999.txt", sep="\t")

	scores, golden_ratings = [], []
	for _, row in data.iterrows():
	if row['word1'] in E and row['word2'] in E:
	v1, v2 = d[row['word1']], d[row['word2']]
	scores.append(v1.dot(v2.T)/(np.linalg.norm(v1) * np.linalg.norm(v2)))
	golden_ratings.append(row['SimLex999'])
	else:
	print "Filtered ", row['word1'], row['word2']
	print spearmanr(scores, golden_ratings)[0]

	"""
	Output:
	Filtered disorganize organize
	0.459089707916
	""""