NateMeyvis/mvp_ballots.py

## mvp_ballots.py
# This is a quick and dirty tool for answering questions such as Tango asked on his blog Nov. 22, 2015.
# I make _no claims_ that this code is elegant or Pythonic. I optimized for development speed.

#***WARNING***: if you got your tab-separated file by copy/pasting from BWWAA's Google Sheet,
#there will be extraneous tabs in two locations. You will need to remove these.
#The offending cells occur in the rows for Jeff Wilson's and Alex Pavlovic's ballots.

#Ballots are available here [https://goo.gl/US8Xp6] (AL) and here [https://goo.gl/p9yH73] (NL)

#Once you have this, you can answer Tango's question with, for example, "give_voting_df('al.txt').sort('Score')['Score']"
#You can also answer any number of other questions about this year's MVP voting.


import pandas as pd

#This is the correpondence between place and points in MVP voting
weights = {1:14, 2:9, 3:8, 4:7, 5:6, 6:5, 7:4, 8:3, 9:2, 10:1}

def in_top(players, voter, n, votes):
    '''
    Is <player> in the top <n> spots for <voter>?
    See function <give_voting_df> for the characteristics of <votes>.
    Returns 1 if true, 0 if false.
    '''
    for player in players:
        try:
            place = votes[(voter, player)]
        except:
            return 0
        if place > n:
            return 0
    return 1

def give_voting_df(path):
    '''
    Given a path to a .csv file for a league's votes, generates a dataframe
    with voters' ballots and their scores on the eight Tango criteria
    '''
    #Import the .csv file into Pandas
    df = pd.DataFrame.from_csv(path, sep='\t')

    #Whip up a dictionary of votes. The keys are (voter, player) pairs;
    #The values are the places where <voter> put <player>.
    votes = {}
    for i in range(len(df)):
        for place in range(11)[1:]:
            votes[(df.index[i], df.iloc[i, place+1])] = place

    #Generate a dataframe describing the votes
    votes_list = [(voter, votes[(voter, player)], player) for (voter, player) in votes]
    df_votes = pd.DataFrame.from_dict({'voter': [i[0] for i in votes_list], 'points': [weights[i[1]] for i in votes_list], 'player': [i[2] for i in votes_list]})

    #Use that dataframe to get the results of the vote
    results = df_votes.groupby(['player']).sum().sort('points', ascending=False)

    #Add columns to the dataframe for each of the eight Tango criteria
    df['tango1'] = map(lambda i: in_top([results.index[0]], i, 2, votes), df.index)
    df['tango2'] = map(lambda i: in_top(results.index[0:2], i, 3, votes), df.index)
    df['tango3'] = map(lambda i: in_top(results.index[0:3], i, 6, votes), df.index)
    df['tango4'] = map(lambda i: in_top(results.index[0:4], i, 10, votes), df.index)
    df['tango5'] = map(lambda i: 1 if i in results.index[0:2] else 0, df['1st'])
    df['tango6'] = map(lambda i: 1 if i in results.index[0:3] else 0, df['2nd'])
    df['tango7'] = map(lambda i: 1 if i in results.index[0:6] else 0, df['3rd'])
    df['tango8'] = map(lambda i: 1 if i in results.index[0:10] else 0, df['4th'])

    #Add a column adding up the points for the eight criterial columns
    df['Score'] = sum([df['tango' + str(i)] for i in range(9)[1:]])

    #Return the dataframe
    return df
	# This is a quick and dirty tool for answering questions such as Tango asked on his blog Nov. 22, 2015.
	# I make _no claims_ that this code is elegant or Pythonic. I optimized for development speed.

	#*WARNING*: if you got your tab-separated file by copy/pasting from BWWAA's Google Sheet,
	#there will be extraneous tabs in two locations. You will need to remove these.
	#The offending cells occur in the rows for Jeff Wilson's and Alex Pavlovic's ballots.

	#Ballots are available here [https://goo.gl/US8Xp6] (AL) and here [https://goo.gl/p9yH73] (NL)

	#Once you have this, you can answer Tango's question with, for example, "give_voting_df('al.txt').sort('Score')['Score']"
	#You can also answer any number of other questions about this year's MVP voting.



	import pandas as pd

	#This is the correpondence between place and points in MVP voting
	weights = {1:14, 2:9, 3:8, 4:7, 5:6, 6:5, 7:4, 8:3, 9:2, 10:1}

	def in_top(players, voter, n, votes):
	'''
	Is <player> in the top <n> spots for <voter>?
	See function <give_voting_df> for the characteristics of <votes>.
	Returns 1 if true, 0 if false.
	'''
	for player in players:
	try:
	place = votes[(voter, player)]
	except:
	return 0
	if place > n:
	return 0
	return 1

	def give_voting_df(path):
	'''
	Given a path to a .csv file for a league's votes, generates a dataframe
	with voters' ballots and their scores on the eight Tango criteria
	'''
	#Import the .csv file into Pandas
	df = pd.DataFrame.from_csv(path, sep='\t')

	#Whip up a dictionary of votes. The keys are (voter, player) pairs;
	#The values are the places where <voter> put <player>.
	votes = {}
	for i in range(len(df)):
	for place in range(11)[1:]:
	votes[(df.index[i], df.iloc[i, place+1])] = place

	#Generate a dataframe describing the votes
	votes_list = [(voter, votes[(voter, player)], player) for (voter, player) in votes]
	df_votes = pd.DataFrame.from_dict({'voter': [i[0] for i in votes_list], 'points': [weights[i[1]] for i in votes_list], 'player': [i[2] for i in votes_list]})

	#Use that dataframe to get the results of the vote
	results = df_votes.groupby(['player']).sum().sort('points', ascending=False)

	#Add columns to the dataframe for each of the eight Tango criteria
	df['tango1'] = map(lambda i: in_top([results.index[0]], i, 2, votes), df.index)
	df['tango2'] = map(lambda i: in_top(results.index[0:2], i, 3, votes), df.index)
	df['tango3'] = map(lambda i: in_top(results.index[0:3], i, 6, votes), df.index)
	df['tango4'] = map(lambda i: in_top(results.index[0:4], i, 10, votes), df.index)
	df['tango5'] = map(lambda i: 1 if i in results.index[0:2] else 0, df['1st'])
	df['tango6'] = map(lambda i: 1 if i in results.index[0:3] else 0, df['2nd'])
	df['tango7'] = map(lambda i: 1 if i in results.index[0:6] else 0, df['3rd'])
	df['tango8'] = map(lambda i: 1 if i in results.index[0:10] else 0, df['4th'])

	#Add a column adding up the points for the eight criterial columns
	df['Score'] = sum([df['tango' + str(i)] for i in range(9)[1:]])

	#Return the dataframe
	return df