Created
November 22, 2015 20:10
-
-
Save NateMeyvis/76802c7f0658fd42943f to your computer and use it in GitHub Desktop.
Response to Tango's question 11/22/2015
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a quick and dirty tool for answering questions such as Tango asked on his blog Nov. 22, 2015. | |
# I make _no claims_ that this code is elegant or Pythonic. I optimized for development speed. | |
#***WARNING***: if you got your tab-separated file by copy/pasting from BWWAA's Google Sheet, | |
#there will be extraneous tabs in two locations. You will need to remove these. | |
#The offending cells occur in the rows for Jeff Wilson's and Alex Pavlovic's ballots. | |
#Ballots are available here [https://goo.gl/US8Xp6] (AL) and here [https://goo.gl/p9yH73] (NL) | |
#Once you have this, you can answer Tango's question with, for example, "give_voting_df('al.txt').sort('Score')['Score']" | |
#You can also answer any number of other questions about this year's MVP voting. | |
import pandas as pd | |
#This is the correpondence between place and points in MVP voting | |
weights = {1:14, 2:9, 3:8, 4:7, 5:6, 6:5, 7:4, 8:3, 9:2, 10:1} | |
def in_top(players, voter, n, votes): | |
''' | |
Is <player> in the top <n> spots for <voter>? | |
See function <give_voting_df> for the characteristics of <votes>. | |
Returns 1 if true, 0 if false. | |
''' | |
for player in players: | |
try: | |
place = votes[(voter, player)] | |
except: | |
return 0 | |
if place > n: | |
return 0 | |
return 1 | |
def give_voting_df(path): | |
''' | |
Given a path to a .csv file for a league's votes, generates a dataframe | |
with voters' ballots and their scores on the eight Tango criteria | |
''' | |
#Import the .csv file into Pandas | |
df = pd.DataFrame.from_csv(path, sep='\t') | |
#Whip up a dictionary of votes. The keys are (voter, player) pairs; | |
#The values are the places where <voter> put <player>. | |
votes = {} | |
for i in range(len(df)): | |
for place in range(11)[1:]: | |
votes[(df.index[i], df.iloc[i, place+1])] = place | |
#Generate a dataframe describing the votes | |
votes_list = [(voter, votes[(voter, player)], player) for (voter, player) in votes] | |
df_votes = pd.DataFrame.from_dict({'voter': [i[0] for i in votes_list], 'points': [weights[i[1]] for i in votes_list], 'player': [i[2] for i in votes_list]}) | |
#Use that dataframe to get the results of the vote | |
results = df_votes.groupby(['player']).sum().sort('points', ascending=False) | |
#Add columns to the dataframe for each of the eight Tango criteria | |
df['tango1'] = map(lambda i: in_top([results.index[0]], i, 2, votes), df.index) | |
df['tango2'] = map(lambda i: in_top(results.index[0:2], i, 3, votes), df.index) | |
df['tango3'] = map(lambda i: in_top(results.index[0:3], i, 6, votes), df.index) | |
df['tango4'] = map(lambda i: in_top(results.index[0:4], i, 10, votes), df.index) | |
df['tango5'] = map(lambda i: 1 if i in results.index[0:2] else 0, df['1st']) | |
df['tango6'] = map(lambda i: 1 if i in results.index[0:3] else 0, df['2nd']) | |
df['tango7'] = map(lambda i: 1 if i in results.index[0:6] else 0, df['3rd']) | |
df['tango8'] = map(lambda i: 1 if i in results.index[0:10] else 0, df['4th']) | |
#Add a column adding up the points for the eight criterial columns | |
df['Score'] = sum([df['tango' + str(i)] for i in range(9)[1:]]) | |
#Return the dataframe | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment