-
-
Save barnden/31ebac6b9641b6a0e4589565b20921e6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def distance(pid, yr): | |
# Computes the Mahalanobis distance for a given player to all other player. | |
# Get player data | |
player = orig_data[(orig_data.playerID == pid) & (orig_data.yearID == yr)][cols] | |
sid = player.index.astype(int)[0] | |
print('Comparing: {} (id: {})'.format(pid, sid)) | |
# Mask invalid values in the player vector | |
pvec = np.ma.masked_invalid(np.array(player)) | |
min_player = None | |
min_val = None | |
for i in range(len(data)): | |
# Get the ith player season | |
cdata = data.iloc[i] | |
# Ignore the current player season | |
if cdata.name == sid: | |
continue | |
# Mask invalid values | |
cvec = np.ma.masked_invalid(np.array(cdata)) | |
# Find difference between x and y | |
delta = pvec - cvec | |
# Find Mahalanobis distance | |
dist = np.sqrt(np.einsum('nj,jk,nk->n', delta, invcov, delta))[0] | |
# Check to see if current distance is smallest, if so, keep it. | |
if min_id == None or min_val > dist: | |
min_player = batting_data.iloc[i] | |
min_val = dist | |
# Print out the most similar season | |
print('Most similar: dist: {}\n{}'.format(min_val, min_player)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment