Skip to content

Instantly share code, notes, and snippets.

@lukebyrne
Last active January 18, 2023 03:01
Show Gist options
  • Save lukebyrne/97e77cf57374f49a75d2914532b2adde to your computer and use it in GitHub Desktop.
Save lukebyrne/97e77cf57374f49a75d2914532b2adde to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: trueskill in /Users/lukebyrne/anaconda3/lib/python3.7/site-packages (0.4.5)\n",
"Requirement already satisfied: six in /Users/lukebyrne/anaconda3/lib/python3.7/site-packages (from trueskill) (1.12.0)\n"
]
}
],
"source": [
"!pip install trueskill"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from trueskill import Rating, rate, TrueSkill"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"df['mu'] = 25.000\n",
"df['sigma'] = 8.333"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Lets setup one game and go through this step by step"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# 1610085\n",
"# 1435686\n",
"first_game = df[df['game_id'] == 1435686]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>date</th>\n",
" <th>player_id</th>\n",
" <th>game_id</th>\n",
" <th>position</th>\n",
" <th>mu</th>\n",
" <th>sigma</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>456</td>\n",
" <td>2015-01-01</td>\n",
" <td>14392.0</td>\n",
" <td>1435686</td>\n",
" <td>5</td>\n",
" <td>25.0</td>\n",
" <td>8.333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>457</td>\n",
" <td>2015-01-01</td>\n",
" <td>13350.0</td>\n",
" <td>1435686</td>\n",
" <td>4</td>\n",
" <td>25.0</td>\n",
" <td>8.333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>458</td>\n",
" <td>2015-01-01</td>\n",
" <td>14889.0</td>\n",
" <td>1435686</td>\n",
" <td>1</td>\n",
" <td>25.0</td>\n",
" <td>8.333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>459</td>\n",
" <td>2015-01-01</td>\n",
" <td>12253.0</td>\n",
" <td>1435686</td>\n",
" <td>3</td>\n",
" <td>25.0</td>\n",
" <td>8.333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>460</td>\n",
" <td>2015-01-01</td>\n",
" <td>4312.0</td>\n",
" <td>1435686</td>\n",
" <td>2</td>\n",
" <td>25.0</td>\n",
" <td>8.333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>454</td>\n",
" <td>2015-01-01</td>\n",
" <td>447.0</td>\n",
" <td>1435686</td>\n",
" <td>6</td>\n",
" <td>25.0</td>\n",
" <td>8.333</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id date player_id game_id position mu sigma\n",
"0 456 2015-01-01 14392.0 1435686 5 25.0 8.333\n",
"1 457 2015-01-01 13350.0 1435686 4 25.0 8.333\n",
"2 458 2015-01-01 14889.0 1435686 1 25.0 8.333\n",
"3 459 2015-01-01 12253.0 1435686 3 25.0 8.333\n",
"4 460 2015-01-01 4312.0 1435686 2 25.0 8.333\n",
"5 454 2015-01-01 447.0 1435686 6 25.0 8.333"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"first_game"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"trueskills = {}\n",
"for index, row in first_game.iterrows():\n",
" trueskills[index] = Rating(mu=25, sigma=8.333)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(trueskill.Rating(mu=25.000, sigma=8.333),),\n",
" (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
" (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
" (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
" (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
" (trueskill.Rating(mu=25.000, sigma=8.333),)]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trueskills_tuples = [(trueskills[x],) for x in list(trueskills.keys())]\n",
"trueskills_tuples"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[4, 3, 0, 2, 1, 5]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ranks_list = [x - 1 for x in list(first_game['position'])]\n",
"ranks_list"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(trueskill.Rating(mu=19.671, sigma=5.363),),\n",
" (trueskill.Rating(mu=23.312, sigma=5.203),),\n",
" (trueskill.Rating(mu=35.297, sigma=5.977),),\n",
" (trueskill.Rating(mu=26.688, sigma=5.203),),\n",
" (trueskill.Rating(mu=30.329, sigma=5.363),),\n",
" (trueskill.Rating(mu=14.703, sigma=5.977),)]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results = rate(trueskills_tuples, ranks=ranks_list)\n",
"results"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0, 1, 2, 3, 4, 5]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"indexes = list(first_game.index)\n",
"indexes"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"for i, index in enumerate(indexes, start=0):\n",
" result = results[i][0]\n",
" mu = round(result.mu, 2)\n",
" sigma = round(result.sigma, 2)\n",
" df.loc[index, 'mu'] = mu\n",
" df.loc[index, 'sigma'] = sigma"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>date</th>\n",
" <th>player_id</th>\n",
" <th>game_id</th>\n",
" <th>position</th>\n",
" <th>mu</th>\n",
" <th>sigma</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>456</td>\n",
" <td>2015-01-01</td>\n",
" <td>14392.0</td>\n",
" <td>1435686</td>\n",
" <td>5</td>\n",
" <td>19.67</td>\n",
" <td>5.36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>457</td>\n",
" <td>2015-01-01</td>\n",
" <td>13350.0</td>\n",
" <td>1435686</td>\n",
" <td>4</td>\n",
" <td>23.31</td>\n",
" <td>5.20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>458</td>\n",
" <td>2015-01-01</td>\n",
" <td>14889.0</td>\n",
" <td>1435686</td>\n",
" <td>1</td>\n",
" <td>35.30</td>\n",
" <td>5.98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>459</td>\n",
" <td>2015-01-01</td>\n",
" <td>12253.0</td>\n",
" <td>1435686</td>\n",
" <td>3</td>\n",
" <td>26.69</td>\n",
" <td>5.20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>460</td>\n",
" <td>2015-01-01</td>\n",
" <td>4312.0</td>\n",
" <td>1435686</td>\n",
" <td>2</td>\n",
" <td>30.33</td>\n",
" <td>5.36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>454</td>\n",
" <td>2015-01-01</td>\n",
" <td>447.0</td>\n",
" <td>1435686</td>\n",
" <td>6</td>\n",
" <td>14.70</td>\n",
" <td>5.98</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id date player_id game_id position mu sigma\n",
"0 456 2015-01-01 14392.0 1435686 5 19.67 5.36\n",
"1 457 2015-01-01 13350.0 1435686 4 23.31 5.20\n",
"2 458 2015-01-01 14889.0 1435686 1 35.30 5.98\n",
"3 459 2015-01-01 12253.0 1435686 3 26.69 5.20\n",
"4 460 2015-01-01 4312.0 1435686 2 30.33 5.36\n",
"5 454 2015-01-01 447.0 1435686 6 14.70 5.98"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['game_id'] == 1435686]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"#df[df['player_id'] == 16395]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## KAMIL - THIS NEEDS TO BE CHECK AND OPTIMISED"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Now setup a sample so that we can go through this for all of them"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"sample_df = df[df['game_id'].isin([1592008, 1592012, 1592238, 1610085])]\n",
"#sample_df = df.head(1000)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[item] = s\n",
"/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[item] = s\n",
"/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[item] = s\n",
"/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[item] = s\n"
]
}
],
"source": [
"# Group by the game_id\n",
"games = sample_df.groupby('game_id')\n",
"\n",
"# Now iterate the games\n",
"for game_id, game in games:\n",
" # Set up a dictionary so we can hold the Trueskill object\n",
" trueskills = {}\n",
" # Now iterate over each player in a game\n",
" for index, row in game.iterrows():\n",
" # Create a filter to get all rows for a player prior to this one, based on index\n",
" filter = (sample_df.index < index) & (sample_df['player_id'] == row['player_id'])\n",
" # Use the filter, we only want their last race\n",
" df_player = sample_df[filter].tail(1)\n",
"\n",
" # If there isnt a race then just use the TrueSkill defaults\n",
" if (len(df_player) == 0):\n",
" mu=25\n",
" sigma=8.333\n",
" else:\n",
" # Otherwise get the mu and sigma from the players last game\n",
" row = df_player.iloc[0]\n",
" mu = row['mu']\n",
" sigma = row['sigma']\n",
" \n",
" # Now create a TrueSkull Rating() class and pass it into the trueskills dictionary\n",
" trueskills[index] = Rating(mu=mu, sigma=sigma)\n",
" \n",
" # Create tuples out of the trueskills dictionary\n",
" trueskills_tuples = [(trueskills[x],) for x in list(trueskills.keys())]\n",
" \n",
" # Use the positions as ranks, they are 0 based so -1 from all of them\n",
" ranks_list = [x - 1 for x in list(game['position'])]\n",
" \n",
" # Get the results from the rate method\n",
" results = rate(trueskills_tuples, ranks=ranks_list)\n",
" \n",
" # Now get the indexes from the game so that we can update the players mu and sigma\n",
" df_indexes = list(game.index)\n",
" \n",
" # Loop through the df_indexes, pull the result from the TrueSkills result\n",
" for i, df_index in enumerate(df_indexes, start=0):\n",
" result = results[i][0]\n",
" mu = round(result.mu, 2)\n",
" sigma = round(result.sigma, 2)\n",
" # Add the updated mu and sigma back onto the sample_df for the game/player\n",
" sample_df.loc[df_index, 'mu'] = mu\n",
" sample_df.loc[df_index, 'sigma'] = sigma"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"#sample_df"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>date</th>\n",
" <th>player_id</th>\n",
" <th>game_id</th>\n",
" <th>position</th>\n",
" <th>mu</th>\n",
" <th>sigma</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>816436</th>\n",
" <td>1397496</td>\n",
" <td>2019-06-19</td>\n",
" <td>16395.0</td>\n",
" <td>1592008</td>\n",
" <td>8</td>\n",
" <td>20.82</td>\n",
" <td>4.64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>816477</th>\n",
" <td>1397459</td>\n",
" <td>2019-06-19</td>\n",
" <td>16395.0</td>\n",
" <td>1592012</td>\n",
" <td>2</td>\n",
" <td>28.02</td>\n",
" <td>3.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>818799</th>\n",
" <td>1401821</td>\n",
" <td>2019-06-23</td>\n",
" <td>16395.0</td>\n",
" <td>1592238</td>\n",
" <td>2</td>\n",
" <td>28.79</td>\n",
" <td>2.84</td>\n",
" </tr>\n",
" <tr>\n",
" <th>912498</th>\n",
" <td>1566778</td>\n",
" <td>2019-12-31</td>\n",
" <td>16395.0</td>\n",
" <td>1610085</td>\n",
" <td>1</td>\n",
" <td>30.80</td>\n",
" <td>2.57</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id date player_id game_id position mu sigma\n",
"816436 1397496 2019-06-19 16395.0 1592008 8 20.82 4.64\n",
"816477 1397459 2019-06-19 16395.0 1592012 2 28.02 3.43\n",
"818799 1401821 2019-06-23 16395.0 1592238 2 28.79 2.84\n",
"912498 1566778 2019-12-31 16395.0 1610085 1 30.80 2.57"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample_df[sample_df['player_id'] == 16395]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
# Fetch the data
df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')
# Create a holding DataFrame for our TrueRank
df_truerank_columns = ['game_id', 'player_id', 'position', 'mu', 'sigma', 'post_mu', 'post_sigma']
df_truerank = pd.DataFrame(columns=df_truerank_columns)
# Use a sample of 1000
#df = df.head(1000)
# Group by the game_id
games = df.groupby('game_id')
# Now iterate the games
for game_id, game in games:
# Setup lists so we can zip them back up at the end
trueskills = []
player_ids = []
game_ids = []
mus = []
sigmas = []
post_mus = []
post_sigmas = []
# Now iterate over each player in a game
for index, row in game.iterrows():
# Create a game_ids arary for zipping up
game_ids.append(game_id)
# Now push the player_id onto the player_ids array for zipping up
player_ids.append(int(row['player_id']))
# Get the players last game, hence tail(1)
filter = (df_truerank['game_id'] < game_id) & (df_truerank['player_id'] == row['player_id'])
df_player = df_truerank[filter].tail(1)
# If there isnt a game then just use the TrueSkill defaults
if (len(df_player) == 0):
mu=25
sigma=8.333
else:
# Otherwise get the mu and sigma from the players last game
row = df_player.iloc[0]
mu = row['post_mu']
sigma = row['post_sigma']
# Keep lists of pre mu and sigmas
mus.append(mu)
sigmas.append(sigma)
# Now create a TrueSkull Rating() class and pass it into the trueskills dictionary
trueskills.append(Rating(mu=mu, sigma=sigma))
# Create tuples out of the trueskills array
trueskills_tuples = [(x,) for x in trueskills]
# Use the positions as ranks, they are 0 based so -1 from all of them
ranks = [x - 1 for x in list(game['position'])]
# Get the results from the TrueSkill rate method
results = rate(trueskills_tuples, ranks=ranks)
# Loop the TrueSkill results and get the new mu and sigma for each player
for result in results:
post_mus.append(round(result[0].mu, 2))
post_sigmas.append(round(result[0].sigma, 2))
# Change the positions back to non 0 based
positions = [x + 1 for x in ranks]
# Now zip together all our lists
data = list(zip(game_ids, player_ids, positions, mus, sigmas, post_mus, post_sigmas))
# Create a temp DataFrame the same as df_truerank and add data to the DataFrame
df_temp = pd.DataFrame(data, columns=df_truerank_columns)
# Add df_temp to our df_truerank
df_truerank = df_truerank.append(df_temp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment