Skip to content

Instantly share code, notes, and snippets.

@lukebyrne
Last active January 18, 2023 03:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save lukebyrne/97e77cf57374f49a75d2914532b2adde to your computer and use it in GitHub Desktop.
Save lukebyrne/97e77cf57374f49a75d2914532b2adde to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# Fetch the data
df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')
# Create a holding DataFrame for our TrueRank
df_truerank_columns = ['game_id', 'player_id', 'position', 'mu', 'sigma', 'post_mu', 'post_sigma']
df_truerank = pd.DataFrame(columns=df_truerank_columns)
# Use a sample of 1000
#df = df.head(1000)
# Group by the game_id
games = df.groupby('game_id')
# Now iterate the games
for game_id, game in games:
# Setup lists so we can zip them back up at the end
trueskills = []
player_ids = []
game_ids = []
mus = []
sigmas = []
post_mus = []
post_sigmas = []
# Now iterate over each player in a game
for index, row in game.iterrows():
# Create a game_ids arary for zipping up
game_ids.append(game_id)
# Now push the player_id onto the player_ids array for zipping up
player_ids.append(int(row['player_id']))
# Get the players last game, hence tail(1)
filter = (df_truerank['game_id'] < game_id) & (df_truerank['player_id'] == row['player_id'])
df_player = df_truerank[filter].tail(1)
# If there isnt a game then just use the TrueSkill defaults
if (len(df_player) == 0):
mu=25
sigma=8.333
else:
# Otherwise get the mu and sigma from the players last game
row = df_player.iloc[0]
mu = row['post_mu']
sigma = row['post_sigma']
# Keep lists of pre mu and sigmas
mus.append(mu)
sigmas.append(sigma)
# Now create a TrueSkull Rating() class and pass it into the trueskills dictionary
trueskills.append(Rating(mu=mu, sigma=sigma))
# Create tuples out of the trueskills array
trueskills_tuples = [(x,) for x in trueskills]
# Use the positions as ranks, they are 0 based so -1 from all of them
ranks = [x - 1 for x in list(game['position'])]
# Get the results from the TrueSkill rate method
results = rate(trueskills_tuples, ranks=ranks)
# Loop the TrueSkill results and get the new mu and sigma for each player
for result in results:
post_mus.append(round(result[0].mu, 2))
post_sigmas.append(round(result[0].sigma, 2))
# Change the positions back to non 0 based
positions = [x + 1 for x in ranks]
# Now zip together all our lists
data = list(zip(game_ids, player_ids, positions, mus, sigmas, post_mus, post_sigmas))
# Create a temp DataFrame the same as df_truerank and add data to the DataFrame
df_temp = pd.DataFrame(data, columns=df_truerank_columns)
# Add df_temp to our df_truerank
df_truerank = df_truerank.append(df_temp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment