lukebyrne/true-skill.ipynb

## true-skill.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: trueskill in /Users/lukebyrne/anaconda3/lib/python3.7/site-packages (0.4.5)\n",
      "Requirement already satisfied: six in /Users/lukebyrne/anaconda3/lib/python3.7/site-packages (from trueskill) (1.12.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install trueskill"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from trueskill import Rating, rate, TrueSkill"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['mu'] = 25.000\n",
    "df['sigma'] = 8.333"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Lets setup one game and go through this step by step"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1610085\n",
    "# 1435686\n",
    "first_game = df[df['game_id'] == 1435686]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>date</th>\n",
       "      <th>player_id</th>\n",
       "      <th>game_id</th>\n",
       "      <th>position</th>\n",
       "      <th>mu</th>\n",
       "      <th>sigma</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>456</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>14392.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>5</td>\n",
       "      <td>25.0</td>\n",
       "      <td>8.333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>457</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>13350.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>4</td>\n",
       "      <td>25.0</td>\n",
       "      <td>8.333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>458</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>14889.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>1</td>\n",
       "      <td>25.0</td>\n",
       "      <td>8.333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>459</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>12253.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>3</td>\n",
       "      <td>25.0</td>\n",
       "      <td>8.333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>460</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>4312.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>2</td>\n",
       "      <td>25.0</td>\n",
       "      <td>8.333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>454</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>447.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>6</td>\n",
       "      <td>25.0</td>\n",
       "      <td>8.333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    id        date  player_id  game_id  position    mu  sigma\n",
       "0  456  2015-01-01    14392.0  1435686         5  25.0  8.333\n",
       "1  457  2015-01-01    13350.0  1435686         4  25.0  8.333\n",
       "2  458  2015-01-01    14889.0  1435686         1  25.0  8.333\n",
       "3  459  2015-01-01    12253.0  1435686         3  25.0  8.333\n",
       "4  460  2015-01-01     4312.0  1435686         2  25.0  8.333\n",
       "5  454  2015-01-01      447.0  1435686         6  25.0  8.333"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "first_game"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "trueskills = {}\n",
    "for index, row in first_game.iterrows():\n",
    "    trueskills[index] = Rating(mu=25, sigma=8.333)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(trueskill.Rating(mu=25.000, sigma=8.333),),\n",
       " (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
       " (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
       " (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
       " (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
       " (trueskill.Rating(mu=25.000, sigma=8.333),)]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trueskills_tuples = [(trueskills[x],) for x in list(trueskills.keys())]\n",
    "trueskills_tuples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[4, 3, 0, 2, 1, 5]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ranks_list = [x - 1 for x in list(first_game['position'])]\n",
    "ranks_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(trueskill.Rating(mu=19.671, sigma=5.363),),\n",
       " (trueskill.Rating(mu=23.312, sigma=5.203),),\n",
       " (trueskill.Rating(mu=35.297, sigma=5.977),),\n",
       " (trueskill.Rating(mu=26.688, sigma=5.203),),\n",
       " (trueskill.Rating(mu=30.329, sigma=5.363),),\n",
       " (trueskill.Rating(mu=14.703, sigma=5.977),)]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results = rate(trueskills_tuples, ranks=ranks_list)\n",
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0, 1, 2, 3, 4, 5]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "indexes = list(first_game.index)\n",
    "indexes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i, index in enumerate(indexes, start=0):\n",
    "    result = results[i][0]\n",
    "    mu = round(result.mu, 2)\n",
    "    sigma = round(result.sigma, 2)\n",
    "    df.loc[index, 'mu'] = mu\n",
    "    df.loc[index, 'sigma'] = sigma"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>date</th>\n",
       "      <th>player_id</th>\n",
       "      <th>game_id</th>\n",
       "      <th>position</th>\n",
       "      <th>mu</th>\n",
       "      <th>sigma</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>456</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>14392.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>5</td>\n",
       "      <td>19.67</td>\n",
       "      <td>5.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>457</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>13350.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>4</td>\n",
       "      <td>23.31</td>\n",
       "      <td>5.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>458</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>14889.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>1</td>\n",
       "      <td>35.30</td>\n",
       "      <td>5.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>459</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>12253.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>3</td>\n",
       "      <td>26.69</td>\n",
       "      <td>5.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>460</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>4312.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>2</td>\n",
       "      <td>30.33</td>\n",
       "      <td>5.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>454</td>\n",
       "      <td>2015-01-01</td>\n",
       "      <td>447.0</td>\n",
       "      <td>1435686</td>\n",
       "      <td>6</td>\n",
       "      <td>14.70</td>\n",
       "      <td>5.98</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    id        date  player_id  game_id  position     mu  sigma\n",
       "0  456  2015-01-01    14392.0  1435686         5  19.67   5.36\n",
       "1  457  2015-01-01    13350.0  1435686         4  23.31   5.20\n",
       "2  458  2015-01-01    14889.0  1435686         1  35.30   5.98\n",
       "3  459  2015-01-01    12253.0  1435686         3  26.69   5.20\n",
       "4  460  2015-01-01     4312.0  1435686         2  30.33   5.36\n",
       "5  454  2015-01-01      447.0  1435686         6  14.70   5.98"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df['game_id'] == 1435686]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "#df[df['player_id'] == 16395]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## KAMIL - THIS NEEDS TO BE CHECK AND OPTIMISED"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now setup a sample so that we can go through this for all of them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "sample_df = df[df['game_id'].isin([1592008, 1592012, 1592238, 1610085])]\n",
    "#sample_df = df.head(1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[item] = s\n",
      "/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[item] = s\n",
      "/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[item] = s\n",
      "/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[item] = s\n"
     ]
    }
   ],
   "source": [
    "# Group by the game_id\n",
    "games = sample_df.groupby('game_id')\n",
    "\n",
    "# Now iterate the games\n",
    "for game_id, game in games:\n",
    "    # Set up a dictionary so we can hold the Trueskill object\n",
    "    trueskills = {}\n",
    "    # Now iterate over each player in a game\n",
    "    for index, row in game.iterrows():\n",
    "        # Create a filter to get all rows for a player prior to this one, based on index\n",
    "        filter = (sample_df.index < index) & (sample_df['player_id'] == row['player_id'])\n",
    "        # Use the filter, we only want their last race\n",
    "        df_player = sample_df[filter].tail(1)\n",
    "\n",
    "        # If there isnt a race then just use the TrueSkill defaults\n",
    "        if (len(df_player) == 0):\n",
    "            mu=25\n",
    "            sigma=8.333\n",
    "        else:\n",
    "            # Otherwise get the mu and sigma from the players last game\n",
    "            row = df_player.iloc[0]\n",
    "            mu = row['mu']\n",
    "            sigma = row['sigma']\n",
    "        \n",
    "        # Now create a TrueSkull Rating() class and pass it into the trueskills dictionary\n",
    "        trueskills[index] = Rating(mu=mu, sigma=sigma)\n",
    "    \n",
    "    # Create tuples out of the trueskills dictionary\n",
    "    trueskills_tuples = [(trueskills[x],) for x in list(trueskills.keys())]\n",
    "    \n",
    "    # Use the positions as ranks, they are 0 based so -1 from all of them\n",
    "    ranks_list = [x - 1 for x in list(game['position'])]\n",
    "    \n",
    "    # Get the results from the rate method\n",
    "    results = rate(trueskills_tuples, ranks=ranks_list)\n",
    "    \n",
    "    # Now get the indexes from the game so that we can update the players mu and sigma\n",
    "    df_indexes = list(game.index)\n",
    "    \n",
    "    # Loop through the df_indexes, pull the result from the TrueSkills result\n",
    "    for i, df_index in enumerate(df_indexes, start=0):\n",
    "        result = results[i][0]\n",
    "        mu = round(result.mu, 2)\n",
    "        sigma = round(result.sigma, 2)\n",
    "        # Add the updated mu and sigma back onto the sample_df for the game/player\n",
    "        sample_df.loc[df_index, 'mu'] = mu\n",
    "        sample_df.loc[df_index, 'sigma'] = sigma"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "#sample_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>date</th>\n",
       "      <th>player_id</th>\n",
       "      <th>game_id</th>\n",
       "      <th>position</th>\n",
       "      <th>mu</th>\n",
       "      <th>sigma</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>816436</th>\n",
       "      <td>1397496</td>\n",
       "      <td>2019-06-19</td>\n",
       "      <td>16395.0</td>\n",
       "      <td>1592008</td>\n",
       "      <td>8</td>\n",
       "      <td>20.82</td>\n",
       "      <td>4.64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>816477</th>\n",
       "      <td>1397459</td>\n",
       "      <td>2019-06-19</td>\n",
       "      <td>16395.0</td>\n",
       "      <td>1592012</td>\n",
       "      <td>2</td>\n",
       "      <td>28.02</td>\n",
       "      <td>3.43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>818799</th>\n",
       "      <td>1401821</td>\n",
       "      <td>2019-06-23</td>\n",
       "      <td>16395.0</td>\n",
       "      <td>1592238</td>\n",
       "      <td>2</td>\n",
       "      <td>28.79</td>\n",
       "      <td>2.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>912498</th>\n",
       "      <td>1566778</td>\n",
       "      <td>2019-12-31</td>\n",
       "      <td>16395.0</td>\n",
       "      <td>1610085</td>\n",
       "      <td>1</td>\n",
       "      <td>30.80</td>\n",
       "      <td>2.57</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             id        date  player_id  game_id  position     mu  sigma\n",
       "816436  1397496  2019-06-19    16395.0  1592008         8  20.82   4.64\n",
       "816477  1397459  2019-06-19    16395.0  1592012         2  28.02   3.43\n",
       "818799  1401821  2019-06-23    16395.0  1592238         2  28.79   2.84\n",
       "912498  1566778  2019-12-31    16395.0  1610085         1  30.80   2.57"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample_df[sample_df['player_id'] == 16395]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

## true-skill.py
# Fetch the data
df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')

# Create a holding DataFrame for our TrueRank
df_truerank_columns = ['game_id', 'player_id', 'position', 'mu', 'sigma', 'post_mu', 'post_sigma']
df_truerank = pd.DataFrame(columns=df_truerank_columns)

# Use a sample of 1000
#df = df.head(1000)

# Group by the game_id
games = df.groupby('game_id')

# Now iterate the games
for game_id, game in games:
    # Setup lists so we can zip them back up at the end
    trueskills = []
    player_ids = []
    game_ids = []
    mus = []
    sigmas = []
    post_mus = []
    post_sigmas = []

    # Now iterate over each player in a game
    for index, row in game.iterrows():

        # Create a game_ids arary for zipping up
        game_ids.append(game_id)

        # Now push the player_id onto the player_ids array for zipping up
        player_ids.append(int(row['player_id']))

        # Get the players last game, hence tail(1)
        filter = (df_truerank['game_id'] < game_id) & (df_truerank['player_id'] == row['player_id'])
        df_player = df_truerank[filter].tail(1)

        # If there isnt a game then just use the TrueSkill defaults
        if (len(df_player) == 0):
            mu=25
            sigma=8.333
        else:
            # Otherwise get the mu and sigma from the players last game
            row = df_player.iloc[0]
            mu = row['post_mu']
            sigma = row['post_sigma']

        # Keep lists of pre mu and sigmas
        mus.append(mu)
        sigmas.append(sigma)

        # Now create a TrueSkull Rating() class and pass it into the trueskills dictionary
        trueskills.append(Rating(mu=mu, sigma=sigma))

    # Create tuples out of the trueskills array
    trueskills_tuples = [(x,) for x in trueskills]

    # Use the positions as ranks, they are 0 based so -1 from all of them
    ranks = [x - 1 for x in list(game['position'])]

    # Get the results from the TrueSkill rate method
    results = rate(trueskills_tuples, ranks=ranks)

    # Loop the TrueSkill results and get the new mu and sigma for each player
    for result in results:
        post_mus.append(round(result[0].mu, 2))
        post_sigmas.append(round(result[0].sigma, 2))

    # Change the positions back to non 0 based
    positions = [x + 1 for x in ranks]

    # Now zip together all our lists
    data = list(zip(game_ids, player_ids, positions, mus, sigmas, post_mus, post_sigmas))

    # Create a temp DataFrame the same as df_truerank and add data to the DataFrame
    df_temp = pd.DataFrame(data, columns=df_truerank_columns)

    # Add df_temp to our df_truerank
    df_truerank = df_truerank.append(df_temp)
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Requirement already satisfied: trueskill in /Users/lukebyrne/anaconda3/lib/python3.7/site-packages (0.4.5)\n",
	"Requirement already satisfied: six in /Users/lukebyrne/anaconda3/lib/python3.7/site-packages (from trueskill) (1.12.0)\n"
	]
	}
	],
	"source": [
	"!pip install trueskill"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"import pandas as pd\n",
	"from trueskill import Rating, rate, TrueSkill"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"df['mu'] = 25.000\n",
	"df['sigma'] = 8.333"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Lets setup one game and go through this step by step"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"# 1610085\n",
	"# 1435686\n",
	"first_game = df[df['game_id'] == 1435686]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>date</th>\n",
	" <th>player_id</th>\n",
	" <th>game_id</th>\n",
	" <th>position</th>\n",
	" <th>mu</th>\n",
	" <th>sigma</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>456</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>14392.0</td>\n",
	" <td>1435686</td>\n",
	" <td>5</td>\n",
	" <td>25.0</td>\n",
	" <td>8.333</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>457</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>13350.0</td>\n",
	" <td>1435686</td>\n",
	" <td>4</td>\n",
	" <td>25.0</td>\n",
	" <td>8.333</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>458</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>14889.0</td>\n",
	" <td>1435686</td>\n",
	" <td>1</td>\n",
	" <td>25.0</td>\n",
	" <td>8.333</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>459</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>12253.0</td>\n",
	" <td>1435686</td>\n",
	" <td>3</td>\n",
	" <td>25.0</td>\n",
	" <td>8.333</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>460</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>4312.0</td>\n",
	" <td>1435686</td>\n",
	" <td>2</td>\n",
	" <td>25.0</td>\n",
	" <td>8.333</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>454</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>447.0</td>\n",
	" <td>1435686</td>\n",
	" <td>6</td>\n",
	" <td>25.0</td>\n",
	" <td>8.333</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id date player_id game_id position mu sigma\n",
	"0 456 2015-01-01 14392.0 1435686 5 25.0 8.333\n",
	"1 457 2015-01-01 13350.0 1435686 4 25.0 8.333\n",
	"2 458 2015-01-01 14889.0 1435686 1 25.0 8.333\n",
	"3 459 2015-01-01 12253.0 1435686 3 25.0 8.333\n",
	"4 460 2015-01-01 4312.0 1435686 2 25.0 8.333\n",
	"5 454 2015-01-01 447.0 1435686 6 25.0 8.333"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"first_game"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"trueskills = {}\n",
	"for index, row in first_game.iterrows():\n",
	" trueskills[index] = Rating(mu=25, sigma=8.333)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[(trueskill.Rating(mu=25.000, sigma=8.333),),\n",
	" (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
	" (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
	" (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
	" (trueskill.Rating(mu=25.000, sigma=8.333),),\n",
	" (trueskill.Rating(mu=25.000, sigma=8.333),)]"
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"trueskills_tuples = [(trueskills[x],) for x in list(trueskills.keys())]\n",
	"trueskills_tuples"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[4, 3, 0, 2, 1, 5]"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"ranks_list = [x - 1 for x in list(first_game['position'])]\n",
	"ranks_list"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[(trueskill.Rating(mu=19.671, sigma=5.363),),\n",
	" (trueskill.Rating(mu=23.312, sigma=5.203),),\n",
	" (trueskill.Rating(mu=35.297, sigma=5.977),),\n",
	" (trueskill.Rating(mu=26.688, sigma=5.203),),\n",
	" (trueskill.Rating(mu=30.329, sigma=5.363),),\n",
	" (trueskill.Rating(mu=14.703, sigma=5.977),)]"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"results = rate(trueskills_tuples, ranks=ranks_list)\n",
	"results"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[0, 1, 2, 3, 4, 5]"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"indexes = list(first_game.index)\n",
	"indexes"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [],
	"source": [
	"for i, index in enumerate(indexes, start=0):\n",
	" result = results[i][0]\n",
	" mu = round(result.mu, 2)\n",
	" sigma = round(result.sigma, 2)\n",
	" df.loc[index, 'mu'] = mu\n",
	" df.loc[index, 'sigma'] = sigma"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>date</th>\n",
	" <th>player_id</th>\n",
	" <th>game_id</th>\n",
	" <th>position</th>\n",
	" <th>mu</th>\n",
	" <th>sigma</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>456</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>14392.0</td>\n",
	" <td>1435686</td>\n",
	" <td>5</td>\n",
	" <td>19.67</td>\n",
	" <td>5.36</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>457</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>13350.0</td>\n",
	" <td>1435686</td>\n",
	" <td>4</td>\n",
	" <td>23.31</td>\n",
	" <td>5.20</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>458</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>14889.0</td>\n",
	" <td>1435686</td>\n",
	" <td>1</td>\n",
	" <td>35.30</td>\n",
	" <td>5.98</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>459</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>12253.0</td>\n",
	" <td>1435686</td>\n",
	" <td>3</td>\n",
	" <td>26.69</td>\n",
	" <td>5.20</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>460</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>4312.0</td>\n",
	" <td>1435686</td>\n",
	" <td>2</td>\n",
	" <td>30.33</td>\n",
	" <td>5.36</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>454</td>\n",
	" <td>2015-01-01</td>\n",
	" <td>447.0</td>\n",
	" <td>1435686</td>\n",
	" <td>6</td>\n",
	" <td>14.70</td>\n",
	" <td>5.98</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id date player_id game_id position mu sigma\n",
	"0 456 2015-01-01 14392.0 1435686 5 19.67 5.36\n",
	"1 457 2015-01-01 13350.0 1435686 4 23.31 5.20\n",
	"2 458 2015-01-01 14889.0 1435686 1 35.30 5.98\n",
	"3 459 2015-01-01 12253.0 1435686 3 26.69 5.20\n",
	"4 460 2015-01-01 4312.0 1435686 2 30.33 5.36\n",
	"5 454 2015-01-01 447.0 1435686 6 14.70 5.98"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df[df['game_id'] == 1435686]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"#df[df['player_id'] == 16395]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## KAMIL - THIS NEEDS TO BE CHECK AND OPTIMISED"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Now setup a sample so that we can go through this for all of them"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [],
	"source": [
	"sample_df = df[df['game_id'].isin([1592008, 1592012, 1592238, 1610085])]\n",
	"#sample_df = df.head(1000)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
	"A value is trying to be set on a copy of a slice from a DataFrame.\n",
	"Try using .loc[row_indexer,col_indexer] = value instead\n",
	"\n",
	"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
	" self.obj[item] = s\n",
	"/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
	"A value is trying to be set on a copy of a slice from a DataFrame.\n",
	"Try using .loc[row_indexer,col_indexer] = value instead\n",
	"\n",
	"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
	" self.obj[item] = s\n",
	"/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
	"A value is trying to be set on a copy of a slice from a DataFrame.\n",
	"Try using .loc[row_indexer,col_indexer] = value instead\n",
	"\n",
	"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
	" self.obj[item] = s\n",
	"/Users/lukebyrne/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
	"A value is trying to be set on a copy of a slice from a DataFrame.\n",
	"Try using .loc[row_indexer,col_indexer] = value instead\n",
	"\n",
	"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
	" self.obj[item] = s\n"
	]
	}
	],
	"source": [
	"# Group by the game_id\n",
	"games = sample_df.groupby('game_id')\n",
	"\n",
	"# Now iterate the games\n",
	"for game_id, game in games:\n",
	" # Set up a dictionary so we can hold the Trueskill object\n",
	" trueskills = {}\n",
	" # Now iterate over each player in a game\n",
	" for index, row in game.iterrows():\n",
	" # Create a filter to get all rows for a player prior to this one, based on index\n",
	" filter = (sample_df.index < index) & (sample_df['player_id'] == row['player_id'])\n",
	" # Use the filter, we only want their last race\n",
	" df_player = sample_df[filter].tail(1)\n",
	"\n",
	" # If there isnt a race then just use the TrueSkill defaults\n",
	" if (len(df_player) == 0):\n",
	" mu=25\n",
	" sigma=8.333\n",
	" else:\n",
	" # Otherwise get the mu and sigma from the players last game\n",
	" row = df_player.iloc[0]\n",
	" mu = row['mu']\n",
	" sigma = row['sigma']\n",
	" \n",
	" # Now create a TrueSkull Rating() class and pass it into the trueskills dictionary\n",
	" trueskills[index] = Rating(mu=mu, sigma=sigma)\n",
	" \n",
	" # Create tuples out of the trueskills dictionary\n",
	" trueskills_tuples = [(trueskills[x],) for x in list(trueskills.keys())]\n",
	" \n",
	" # Use the positions as ranks, they are 0 based so -1 from all of them\n",
	" ranks_list = [x - 1 for x in list(game['position'])]\n",
	" \n",
	" # Get the results from the rate method\n",
	" results = rate(trueskills_tuples, ranks=ranks_list)\n",
	" \n",
	" # Now get the indexes from the game so that we can update the players mu and sigma\n",
	" df_indexes = list(game.index)\n",
	" \n",
	" # Loop through the df_indexes, pull the result from the TrueSkills result\n",
	" for i, df_index in enumerate(df_indexes, start=0):\n",
	" result = results[i][0]\n",
	" mu = round(result.mu, 2)\n",
	" sigma = round(result.sigma, 2)\n",
	" # Add the updated mu and sigma back onto the sample_df for the game/player\n",
	" sample_df.loc[df_index, 'mu'] = mu\n",
	" sample_df.loc[df_index, 'sigma'] = sigma"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [],
	"source": [
	"#sample_df"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>id</th>\n",
	" <th>date</th>\n",
	" <th>player_id</th>\n",
	" <th>game_id</th>\n",
	" <th>position</th>\n",
	" <th>mu</th>\n",
	" <th>sigma</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>816436</th>\n",
	" <td>1397496</td>\n",
	" <td>2019-06-19</td>\n",
	" <td>16395.0</td>\n",
	" <td>1592008</td>\n",
	" <td>8</td>\n",
	" <td>20.82</td>\n",
	" <td>4.64</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>816477</th>\n",
	" <td>1397459</td>\n",
	" <td>2019-06-19</td>\n",
	" <td>16395.0</td>\n",
	" <td>1592012</td>\n",
	" <td>2</td>\n",
	" <td>28.02</td>\n",
	" <td>3.43</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>818799</th>\n",
	" <td>1401821</td>\n",
	" <td>2019-06-23</td>\n",
	" <td>16395.0</td>\n",
	" <td>1592238</td>\n",
	" <td>2</td>\n",
	" <td>28.79</td>\n",
	" <td>2.84</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>912498</th>\n",
	" <td>1566778</td>\n",
	" <td>2019-12-31</td>\n",
	" <td>16395.0</td>\n",
	" <td>1610085</td>\n",
	" <td>1</td>\n",
	" <td>30.80</td>\n",
	" <td>2.57</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" id date player_id game_id position mu sigma\n",
	"816436 1397496 2019-06-19 16395.0 1592008 8 20.82 4.64\n",
	"816477 1397459 2019-06-19 16395.0 1592012 2 28.02 3.43\n",
	"818799 1401821 2019-06-23 16395.0 1592238 2 28.79 2.84\n",
	"912498 1566778 2019-12-31 16395.0 1610085 1 30.80 2.57"
	]
	},
	"execution_count": 19,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sample_df[sample_df['player_id'] == 16395]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
	# Fetch the data
	df = pd.read_csv('http://horse-data-abcd.s3.amazonaws.com/game_results.csv')

	# Create a holding DataFrame for our TrueRank
	df_truerank_columns = ['game_id', 'player_id', 'position', 'mu', 'sigma', 'post_mu', 'post_sigma']
	df_truerank = pd.DataFrame(columns=df_truerank_columns)

	# Use a sample of 1000
	#df = df.head(1000)

	# Group by the game_id
	games = df.groupby('game_id')

	# Now iterate the games
	for game_id, game in games:
	# Setup lists so we can zip them back up at the end
	trueskills = []
	player_ids = []
	game_ids = []
	mus = []
	sigmas = []
	post_mus = []
	post_sigmas = []

	# Now iterate over each player in a game
	for index, row in game.iterrows():

	# Create a game_ids arary for zipping up
	game_ids.append(game_id)

	# Now push the player_id onto the player_ids array for zipping up
	player_ids.append(int(row['player_id']))

	# Get the players last game, hence tail(1)
	filter = (df_truerank['game_id'] < game_id) & (df_truerank['player_id'] == row['player_id'])
	df_player = df_truerank[filter].tail(1)

	# If there isnt a game then just use the TrueSkill defaults
	if (len(df_player) == 0):
	mu=25
	sigma=8.333
	else:
	# Otherwise get the mu and sigma from the players last game
	row = df_player.iloc[0]
	mu = row['post_mu']
	sigma = row['post_sigma']

	# Keep lists of pre mu and sigmas
	mus.append(mu)
	sigmas.append(sigma)

	# Now create a TrueSkull Rating() class and pass it into the trueskills dictionary
	trueskills.append(Rating(mu=mu, sigma=sigma))

	# Create tuples out of the trueskills array
	trueskills_tuples = [(x,) for x in trueskills]

	# Use the positions as ranks, they are 0 based so -1 from all of them
	ranks = [x - 1 for x in list(game['position'])]

	# Get the results from the TrueSkill rate method
	results = rate(trueskills_tuples, ranks=ranks)

	# Loop the TrueSkill results and get the new mu and sigma for each player
	for result in results:
	post_mus.append(round(result[0].mu, 2))
	post_sigmas.append(round(result[0].sigma, 2))

	# Change the positions back to non 0 based
	positions = [x + 1 for x in ranks]

	# Now zip together all our lists
	data = list(zip(game_ids, player_ids, positions, mus, sigmas, post_mus, post_sigmas))

	# Create a temp DataFrame the same as df_truerank and add data to the DataFrame
	df_temp = pd.DataFrame(data, columns=df_truerank_columns)

	# Add df_temp to our df_truerank
	df_truerank = df_truerank.append(df_temp)