Skip to content

Instantly share code, notes, and snippets.

@heartonbit
Last active May 2, 2018 01:13
Show Gist options
  • Save heartonbit/e751d767595bc82e938bc4e9888ea1f6 to your computer and use it in GitHub Desktop.
Save heartonbit/e751d767595bc82e938bc4e9888ea1f6 to your computer and use it in GitHub Desktop.
def cosine_distances(X, Y):
"""
X : Target example score vector DataFrame with inst_id as the first column
Y : All example score vector DataFrame with inst_id as the first column
return pair-wise cosine distance DataFrame
"""
from sklearn.metrics import pairwise
x_header = X.iloc[:, 0].values
X = X.iloc[:, 1:]
y_header = Y.iloc[:, 0].values
Y = Y.iloc[:, 1:]
d_mat = pairwise.cosine_distances(X, Y)
d_mat_df = pd.DataFrame(d_mat, index=x_header)
d_mat_df.columns = y_header
return d_mat_df
def validate(X, Y, d_mat_df)
"""
Validate cosine distances function
X : Bait DataFrame with inst_id as the first column
Y : All DataFrame with inst_id as the first column
d_mat_df : result DataFrame from cosine_distances function
"""
for x_inst in X.iloc[:, 0]:
x1 = X[X['inst_id'] == x_inst].iloc[:, 1:]
for y_inst in Y.iloc[:, 0]:
y1 = Y[Y['inst_id'] == y_inst].iloc[:, 1:]
dist1 = cosine_distances(x1, y1)[0][0]
dist2 = d_mat_df[y_inst][x_inst]
assert abs(dist1 - dist2) < 0.00001, "Something is wrong. {0} is NOT equal with {1}".format(dist1, dist2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment