lukebyrne/true-skill.ipynb

## true-skill.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              true-skill.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## true-skill.py
# Fetch the data
df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')

# Create a holding DataFrame for our TrueRank
df_truerank_columns = ['game_id', 'player_id', 'position', 'mu', 'sigma', 'post_mu', 'post_sigma']
df_truerank = pd.DataFrame(columns=df_truerank_columns)

# Use a sample of 1000
#df = df.head(1000)

# Group by the game_id
games = df.groupby('game_id')

# Now iterate the games
for game_id, game in games:
    # Setup lists so we can zip them back up at the end
    trueskills = []
    player_ids = []
    game_ids = []
    mus = []
    sigmas = []
    post_mus = []
    post_sigmas = []

    # Now iterate over each player in a game
    for index, row in game.iterrows():

        # Create a game_ids arary for zipping up
        game_ids.append(game_id)

        # Now push the player_id onto the player_ids array for zipping up
        player_ids.append(int(row['player_id']))

        # Get the players last game, hence tail(1)
        filter = (df_truerank['game_id'] < game_id) & (df_truerank['player_id'] == row['player_id'])
        df_player = df_truerank[filter].tail(1)

        # If there isnt a game then just use the TrueSkill defaults
        if (len(df_player) == 0):
            mu=25
            sigma=8.333
        else:
            # Otherwise get the mu and sigma from the players last game
            row = df_player.iloc[0]
            mu = row['post_mu']
            sigma = row['post_sigma']

        # Keep lists of pre mu and sigmas
        mus.append(mu)
        sigmas.append(sigma)

        # Now create a TrueSkull Rating() class and pass it into the trueskills dictionary
        trueskills.append(Rating(mu=mu, sigma=sigma))

    # Create tuples out of the trueskills array
    trueskills_tuples = [(x,) for x in trueskills]

    # Use the positions as ranks, they are 0 based so -1 from all of them
    ranks = [x - 1 for x in list(game['position'])]

    # Get the results from the TrueSkill rate method
    results = rate(trueskills_tuples, ranks=ranks)

    # Loop the TrueSkill results and get the new mu and sigma for each player
    for result in results:
        post_mus.append(round(result[0].mu, 2))
        post_sigmas.append(round(result[0].sigma, 2))

    # Change the positions back to non 0 based
    positions = [x + 1 for x in ranks]

    # Now zip together all our lists
    data = list(zip(game_ids, player_ids, positions, mus, sigmas, post_mus, post_sigmas))

    # Create a temp DataFrame the same as df_truerank and add data to the DataFrame
    df_temp = pd.DataFrame(data, columns=df_truerank_columns)

    # Add df_temp to our df_truerank
    df_truerank = df_truerank.append(df_temp)
	# Fetch the data
	df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')

	# Create a holding DataFrame for our TrueRank
	df_truerank_columns = ['game_id', 'player_id', 'position', 'mu', 'sigma', 'post_mu', 'post_sigma']
	df_truerank = pd.DataFrame(columns=df_truerank_columns)

	# Use a sample of 1000
	#df = df.head(1000)

	# Group by the game_id
	games = df.groupby('game_id')

	# Now iterate the games
	for game_id, game in games:
	# Setup lists so we can zip them back up at the end
	trueskills = []
	player_ids = []
	game_ids = []
	mus = []
	sigmas = []
	post_mus = []
	post_sigmas = []

	# Now iterate over each player in a game
	for index, row in game.iterrows():

	# Create a game_ids arary for zipping up
	game_ids.append(game_id)

	# Now push the player_id onto the player_ids array for zipping up
	player_ids.append(int(row['player_id']))

	# Get the players last game, hence tail(1)
	filter = (df_truerank['game_id'] < game_id) & (df_truerank['player_id'] == row['player_id'])
	df_player = df_truerank[filter].tail(1)

	# If there isnt a game then just use the TrueSkill defaults
	if (len(df_player) == 0):
	mu=25
	sigma=8.333
	else:
	# Otherwise get the mu and sigma from the players last game
	row = df_player.iloc[0]
	mu = row['post_mu']
	sigma = row['post_sigma']

	# Keep lists of pre mu and sigmas
	mus.append(mu)
	sigmas.append(sigma)

	# Now create a TrueSkull Rating() class and pass it into the trueskills dictionary
	trueskills.append(Rating(mu=mu, sigma=sigma))

	# Create tuples out of the trueskills array
	trueskills_tuples = [(x,) for x in trueskills]

	# Use the positions as ranks, they are 0 based so -1 from all of them
	ranks = [x - 1 for x in list(game['position'])]

	# Get the results from the TrueSkill rate method
	results = rate(trueskills_tuples, ranks=ranks)

	# Loop the TrueSkill results and get the new mu and sigma for each player
	for result in results:
	post_mus.append(round(result[0].mu, 2))
	post_sigmas.append(round(result[0].sigma, 2))

	# Change the positions back to non 0 based
	positions = [x + 1 for x in ranks]

	# Now zip together all our lists
	data = list(zip(game_ids, player_ids, positions, mus, sigmas, post_mus, post_sigmas))

	# Create a temp DataFrame the same as df_truerank and add data to the DataFrame
	df_temp = pd.DataFrame(data, columns=df_truerank_columns)

	# Add df_temp to our df_truerank
	df_truerank = df_truerank.append(df_temp)