Last active
February 5, 2019 13:30
-
-
Save AustinRochford/3ce28da1b7205c123366f1ac66811cfc to your computer and use it in GitHub Desktop.
Recreating Total Baseball Park Adjustment All Teams
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Repeating the calculations explained [here](https://www.baseball-reference.com/about/parkadjust.shtml) in Python using `pybaseball`." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import pybaseball" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"SEASON = 1982\n", | |
"\n", | |
"VERIFY = SEASON == 1982\n", | |
"VERIFY_TEAM = \"ATL\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"SEASON_DATA_PATH = f'./data/{SEASON}.csv'\n", | |
"\n", | |
"try:\n", | |
" sgl_df = pd.read_csv(SEASON_DATA_PATH)\n", | |
"except FileNotFoundError:\n", | |
" sgl_df = pybaseball.season_game_logs(SEASON)\n", | |
" sgl_df.to_csv(SEASON_DATA_PATH, index=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>game_num</th>\n", | |
" <th>day_of_week</th>\n", | |
" <th>visiting_team</th>\n", | |
" <th>visiting_team_league</th>\n", | |
" <th>visiting_game_num</th>\n", | |
" <th>home_team</th>\n", | |
" <th>home_team_league</th>\n", | |
" <th>home_team_game_num</th>\n", | |
" <th>visiting_score</th>\n", | |
" <th>...</th>\n", | |
" <th>home_7_name</th>\n", | |
" <th>home_7_pos</th>\n", | |
" <th>home_8_id</th>\n", | |
" <th>home_8_name</th>\n", | |
" <th>home_8_pos</th>\n", | |
" <th>home_9_id</th>\n", | |
" <th>home_9_name</th>\n", | |
" <th>home_9_pos</th>\n", | |
" <th>misc</th>\n", | |
" <th>acquisition_info</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>19820405</td>\n", | |
" <td>0</td>\n", | |
" <td>Mon</td>\n", | |
" <td>KCA</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>BAL</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>...</td>\n", | |
" <td>Gary Roenicke</td>\n", | |
" <td>7</td>\n", | |
" <td>sakal001</td>\n", | |
" <td>Lenn Sakata</td>\n", | |
" <td>6</td>\n", | |
" <td>dauer001</td>\n", | |
" <td>Rich Dauer</td>\n", | |
" <td>4</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Y</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>19820405</td>\n", | |
" <td>0</td>\n", | |
" <td>Mon</td>\n", | |
" <td>CHN</td>\n", | |
" <td>NL</td>\n", | |
" <td>1</td>\n", | |
" <td>CIN</td>\n", | |
" <td>NL</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>...</td>\n", | |
" <td>Clint Hurdle</td>\n", | |
" <td>7</td>\n", | |
" <td>treva001</td>\n", | |
" <td>Alex Trevino</td>\n", | |
" <td>2</td>\n", | |
" <td>sotom001</td>\n", | |
" <td>Mario Soto</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Y</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>19820406</td>\n", | |
" <td>0</td>\n", | |
" <td>Tue</td>\n", | |
" <td>SEA</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>MIN</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>11</td>\n", | |
" <td>...</td>\n", | |
" <td>Butch Wynegar</td>\n", | |
" <td>2</td>\n", | |
" <td>wilfr001</td>\n", | |
" <td>Rob Wilfong</td>\n", | |
" <td>4</td>\n", | |
" <td>hatcm001</td>\n", | |
" <td>Mickey Hatcher</td>\n", | |
" <td>7</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Y</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>19820406</td>\n", | |
" <td>0</td>\n", | |
" <td>Tue</td>\n", | |
" <td>CAL</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>OAK</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>...</td>\n", | |
" <td>Davey Lopes</td>\n", | |
" <td>4</td>\n", | |
" <td>kearb001</td>\n", | |
" <td>Bob Kearney</td>\n", | |
" <td>2</td>\n", | |
" <td>stanf101</td>\n", | |
" <td>Fred Stanley</td>\n", | |
" <td>6</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Y</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>19820406</td>\n", | |
" <td>0</td>\n", | |
" <td>Tue</td>\n", | |
" <td>SLN</td>\n", | |
" <td>NL</td>\n", | |
" <td>1</td>\n", | |
" <td>HOU</td>\n", | |
" <td>NL</td>\n", | |
" <td>1</td>\n", | |
" <td>14</td>\n", | |
" <td>...</td>\n", | |
" <td>Art Howe</td>\n", | |
" <td>3</td>\n", | |
" <td>reync001</td>\n", | |
" <td>Craig Reynolds</td>\n", | |
" <td>6</td>\n", | |
" <td>ryann001</td>\n", | |
" <td>Nolan Ryan</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Y</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 161 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date game_num day_of_week visiting_team visiting_team_league \\\n", | |
"0 19820405 0 Mon KCA AL \n", | |
"1 19820405 0 Mon CHN NL \n", | |
"2 19820406 0 Tue SEA AL \n", | |
"3 19820406 0 Tue CAL AL \n", | |
"4 19820406 0 Tue SLN NL \n", | |
"\n", | |
" visiting_game_num home_team home_team_league home_team_game_num \\\n", | |
"0 1 BAL AL 1 \n", | |
"1 1 CIN NL 1 \n", | |
"2 1 MIN AL 1 \n", | |
"3 1 OAK AL 1 \n", | |
"4 1 HOU NL 1 \n", | |
"\n", | |
" visiting_score ... home_7_name home_7_pos home_8_id \\\n", | |
"0 5 ... Gary Roenicke 7 sakal001 \n", | |
"1 3 ... Clint Hurdle 7 treva001 \n", | |
"2 11 ... Butch Wynegar 2 wilfr001 \n", | |
"3 2 ... Davey Lopes 4 kearb001 \n", | |
"4 14 ... Art Howe 3 reync001 \n", | |
"\n", | |
" home_8_name home_8_pos home_9_id home_9_name home_9_pos misc \\\n", | |
"0 Lenn Sakata 6 dauer001 Rich Dauer 4 NaN \n", | |
"1 Alex Trevino 2 sotom001 Mario Soto 1 NaN \n", | |
"2 Rob Wilfong 4 hatcm001 Mickey Hatcher 7 NaN \n", | |
"3 Bob Kearney 2 stanf101 Fred Stanley 6 NaN \n", | |
"4 Craig Reynolds 6 ryann001 Nolan Ryan 1 NaN \n", | |
"\n", | |
" acquisition_info \n", | |
"0 Y \n", | |
"1 Y \n", | |
"2 Y \n", | |
"3 Y \n", | |
"4 Y \n", | |
"\n", | |
"[5 rows x 161 columns]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sgl_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"long_df = (sgl_df[sgl_df['home_team_league'] == sgl_df['visiting_team_league']]\n", | |
" .rename_axis('game_id')\n", | |
" .reset_index()\n", | |
" .melt(id_vars=['game_id', 'home_score', 'visiting_score'],\n", | |
" value_vars=['home_team', 'visiting_team'],\n", | |
" var_name='home_away', value_name='team'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"long_df['is_home'] = long_df['home_away'] == 'home_team'\n", | |
"\n", | |
"long_df['is_win'] = (long_df['home_score']\n", | |
" .gt(long_df['visiting_score'])\n", | |
" .where(long_df['is_home'],\n", | |
" long_df['visiting_score']\n", | |
" .gt(long_df['home_score'])))\n", | |
"\n", | |
"long_df['r'] = long_df['home_score'].where(long_df['is_home'], long_df['visiting_score'])\n", | |
"long_df['ra'] = long_df['home_score'].where(~long_df['is_home'], long_df['visiting_score'])\n", | |
"long_df['rtot'] = long_df[['r', 'ra']].sum(axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>game_id</th>\n", | |
" <th>home_score</th>\n", | |
" <th>visiting_score</th>\n", | |
" <th>home_away</th>\n", | |
" <th>team</th>\n", | |
" <th>is_home</th>\n", | |
" <th>is_win</th>\n", | |
" <th>r</th>\n", | |
" <th>ra</th>\n", | |
" <th>rtot</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>13</td>\n", | |
" <td>5</td>\n", | |
" <td>home_team</td>\n", | |
" <td>BAL</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>13</td>\n", | |
" <td>5</td>\n", | |
" <td>18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>home_team</td>\n", | |
" <td>CIN</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>7</td>\n", | |
" <td>11</td>\n", | |
" <td>home_team</td>\n", | |
" <td>MIN</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>7</td>\n", | |
" <td>11</td>\n", | |
" <td>18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>home_team</td>\n", | |
" <td>OAK</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>3</td>\n", | |
" <td>14</td>\n", | |
" <td>home_team</td>\n", | |
" <td>HOU</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>3</td>\n", | |
" <td>14</td>\n", | |
" <td>17</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" game_id home_score visiting_score home_away team is_home is_win r \\\n", | |
"0 0 13 5 home_team BAL True True 13 \n", | |
"1 1 2 3 home_team CIN True False 2 \n", | |
"2 2 7 11 home_team MIN True False 7 \n", | |
"3 3 3 2 home_team OAK True True 3 \n", | |
"4 4 3 14 home_team HOU True False 3 \n", | |
"\n", | |
" ra rtot \n", | |
"0 5 18 \n", | |
"1 3 5 \n", | |
"2 11 18 \n", | |
"3 2 5 \n", | |
"4 14 17 " | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"long_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>game_id</th>\n", | |
" <th>home_score</th>\n", | |
" <th>visiting_score</th>\n", | |
" <th>home_away</th>\n", | |
" <th>team</th>\n", | |
" <th>is_home</th>\n", | |
" <th>is_win</th>\n", | |
" <th>r</th>\n", | |
" <th>ra</th>\n", | |
" <th>rtot</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>4209</th>\n", | |
" <td>2102</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>visiting_team</td>\n", | |
" <td>CIN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4210</th>\n", | |
" <td>2103</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>visiting_team</td>\n", | |
" <td>NYN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4211</th>\n", | |
" <td>2104</td>\n", | |
" <td>1</td>\n", | |
" <td>6</td>\n", | |
" <td>visiting_team</td>\n", | |
" <td>MON</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>6</td>\n", | |
" <td>1</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4212</th>\n", | |
" <td>2105</td>\n", | |
" <td>5</td>\n", | |
" <td>1</td>\n", | |
" <td>visiting_team</td>\n", | |
" <td>ATL</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4213</th>\n", | |
" <td>2106</td>\n", | |
" <td>5</td>\n", | |
" <td>3</td>\n", | |
" <td>visiting_team</td>\n", | |
" <td>LAN</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>3</td>\n", | |
" <td>5</td>\n", | |
" <td>8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" game_id home_score visiting_score home_away team is_home \\\n", | |
"4209 2102 3 0 visiting_team CIN False \n", | |
"4210 2103 4 1 visiting_team NYN False \n", | |
"4211 2104 1 6 visiting_team MON False \n", | |
"4212 2105 5 1 visiting_team ATL False \n", | |
"4213 2106 5 3 visiting_team LAN False \n", | |
"\n", | |
" is_win r ra rtot \n", | |
"4209 False 0 3 3 \n", | |
"4210 False 1 4 5 \n", | |
"4211 True 6 1 7 \n", | |
"4212 False 1 5 6 \n", | |
"4213 False 3 5 8 " | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"long_df.tail()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"home_away_df = (long_df.groupby(['team', 'is_home'])\n", | |
" .agg({'r': 'sum',\n", | |
" 'ra': 'sum',\n", | |
" 'rtot': 'sum',\n", | |
" 'is_win': 'sum',\n", | |
" 'game_id': 'size'})\n", | |
" .rename(columns={'is_win': 'wins', 'game_id': 'gp'}))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>r</th>\n", | |
" <th>ra</th>\n", | |
" <th>rtot</th>\n", | |
" <th>wins</th>\n", | |
" <th>gp</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>team</th>\n", | |
" <th>is_home</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">ATL</th>\n", | |
" <th>False</th>\n", | |
" <td>351</td>\n", | |
" <td>315</td>\n", | |
" <td>666</td>\n", | |
" <td>47.0</td>\n", | |
" <td>81</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>True</th>\n", | |
" <td>388</td>\n", | |
" <td>387</td>\n", | |
" <td>775</td>\n", | |
" <td>42.0</td>\n", | |
" <td>81</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">BAL</th>\n", | |
" <th>False</th>\n", | |
" <td>377</td>\n", | |
" <td>357</td>\n", | |
" <td>734</td>\n", | |
" <td>41.0</td>\n", | |
" <td>81</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>True</th>\n", | |
" <td>397</td>\n", | |
" <td>330</td>\n", | |
" <td>727</td>\n", | |
" <td>53.0</td>\n", | |
" <td>82</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>BOS</th>\n", | |
" <th>False</th>\n", | |
" <td>319</td>\n", | |
" <td>343</td>\n", | |
" <td>662</td>\n", | |
" <td>40.0</td>\n", | |
" <td>81</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" r ra rtot wins gp\n", | |
"team is_home \n", | |
"ATL False 351 315 666 47.0 81\n", | |
" True 388 387 775 42.0 81\n", | |
"BAL False 377 357 734 41.0 81\n", | |
" True 397 330 727 53.0 82\n", | |
"BOS False 319 343 662 40.0 81" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"home_away_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"home_away_pct = (home_away_df['wins']\n", | |
" .div(home_away_df['gp'])\n", | |
" .unstack(level='is_home'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>is_home</th>\n", | |
" <th>False</th>\n", | |
" <th>True</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>team</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>ATL</th>\n", | |
" <td>0.580247</td>\n", | |
" <td>0.518519</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>BAL</th>\n", | |
" <td>0.506173</td>\n", | |
" <td>0.646341</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>BOS</th>\n", | |
" <td>0.493827</td>\n", | |
" <td>0.604938</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>CAL</th>\n", | |
" <td>0.506173</td>\n", | |
" <td>0.641975</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>CHA</th>\n", | |
" <td>0.463415</td>\n", | |
" <td>0.612500</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"is_home False True \n", | |
"team \n", | |
"ATL 0.580247 0.518519\n", | |
"BAL 0.506173 0.646341\n", | |
"BOS 0.493827 0.604938\n", | |
"CAL 0.506173 0.641975\n", | |
"CHA 0.463415 0.612500" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"home_away_pct.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"IPC = (18.5 - home_away_pct[True]) / (18.5 - (1 - home_away_pct[False]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def verify_agreement_with_total_baseball(val, true_val, name, atol=1e-3):\n", | |
" assert np.allclose(val, true_val, atol=atol)\n", | |
" print(f\"{name} agrees with Total Baseball\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"IPC agrees with Total Baseball\n" | |
] | |
} | |
], | |
"source": [ | |
"if VERIFY:\n", | |
" verify_agreement_with_total_baseball(IPC.loc[VERIFY_TEAM], 0.995, \"IPC\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"rpg = home_away_df['rtot'] / home_away_df['gp']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"RFT = (rpg.xs(True, level='is_home')\n", | |
" .div(rpg.xs(False, level='is_home'))\n", | |
" .div(IPC))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"RFT agrees with Total Baseball\n" | |
] | |
} | |
], | |
"source": [ | |
"if VERIFY:\n", | |
" verify_agreement_with_total_baseball(RFT.loc[VERIFY_TEAM], 1.170, \"RFT\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>game_num</th>\n", | |
" <th>day_of_week</th>\n", | |
" <th>visiting_team</th>\n", | |
" <th>visiting_team_league</th>\n", | |
" <th>visiting_game_num</th>\n", | |
" <th>home_team</th>\n", | |
" <th>home_team_league</th>\n", | |
" <th>home_team_game_num</th>\n", | |
" <th>visiting_score</th>\n", | |
" <th>...</th>\n", | |
" <th>home_7_name</th>\n", | |
" <th>home_7_pos</th>\n", | |
" <th>home_8_id</th>\n", | |
" <th>home_8_name</th>\n", | |
" <th>home_8_pos</th>\n", | |
" <th>home_9_id</th>\n", | |
" <th>home_9_name</th>\n", | |
" <th>home_9_pos</th>\n", | |
" <th>misc</th>\n", | |
" <th>acquisition_info</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>19820405</td>\n", | |
" <td>0</td>\n", | |
" <td>Mon</td>\n", | |
" <td>KCA</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>BAL</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>...</td>\n", | |
" <td>Gary Roenicke</td>\n", | |
" <td>7</td>\n", | |
" <td>sakal001</td>\n", | |
" <td>Lenn Sakata</td>\n", | |
" <td>6</td>\n", | |
" <td>dauer001</td>\n", | |
" <td>Rich Dauer</td>\n", | |
" <td>4</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Y</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>19820405</td>\n", | |
" <td>0</td>\n", | |
" <td>Mon</td>\n", | |
" <td>CHN</td>\n", | |
" <td>NL</td>\n", | |
" <td>1</td>\n", | |
" <td>CIN</td>\n", | |
" <td>NL</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>...</td>\n", | |
" <td>Clint Hurdle</td>\n", | |
" <td>7</td>\n", | |
" <td>treva001</td>\n", | |
" <td>Alex Trevino</td>\n", | |
" <td>2</td>\n", | |
" <td>sotom001</td>\n", | |
" <td>Mario Soto</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Y</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>19820406</td>\n", | |
" <td>0</td>\n", | |
" <td>Tue</td>\n", | |
" <td>SEA</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>MIN</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>11</td>\n", | |
" <td>...</td>\n", | |
" <td>Butch Wynegar</td>\n", | |
" <td>2</td>\n", | |
" <td>wilfr001</td>\n", | |
" <td>Rob Wilfong</td>\n", | |
" <td>4</td>\n", | |
" <td>hatcm001</td>\n", | |
" <td>Mickey Hatcher</td>\n", | |
" <td>7</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Y</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>19820406</td>\n", | |
" <td>0</td>\n", | |
" <td>Tue</td>\n", | |
" <td>CAL</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>OAK</td>\n", | |
" <td>AL</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>...</td>\n", | |
" <td>Davey Lopes</td>\n", | |
" <td>4</td>\n", | |
" <td>kearb001</td>\n", | |
" <td>Bob Kearney</td>\n", | |
" <td>2</td>\n", | |
" <td>stanf101</td>\n", | |
" <td>Fred Stanley</td>\n", | |
" <td>6</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Y</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>19820406</td>\n", | |
" <td>0</td>\n", | |
" <td>Tue</td>\n", | |
" <td>SLN</td>\n", | |
" <td>NL</td>\n", | |
" <td>1</td>\n", | |
" <td>HOU</td>\n", | |
" <td>NL</td>\n", | |
" <td>1</td>\n", | |
" <td>14</td>\n", | |
" <td>...</td>\n", | |
" <td>Art Howe</td>\n", | |
" <td>3</td>\n", | |
" <td>reync001</td>\n", | |
" <td>Craig Reynolds</td>\n", | |
" <td>6</td>\n", | |
" <td>ryann001</td>\n", | |
" <td>Nolan Ryan</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Y</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 161 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date game_num day_of_week visiting_team visiting_team_league \\\n", | |
"0 19820405 0 Mon KCA AL \n", | |
"1 19820405 0 Mon CHN NL \n", | |
"2 19820406 0 Tue SEA AL \n", | |
"3 19820406 0 Tue CAL AL \n", | |
"4 19820406 0 Tue SLN NL \n", | |
"\n", | |
" visiting_game_num home_team home_team_league home_team_game_num \\\n", | |
"0 1 BAL AL 1 \n", | |
"1 1 CIN NL 1 \n", | |
"2 1 MIN AL 1 \n", | |
"3 1 OAK AL 1 \n", | |
"4 1 HOU NL 1 \n", | |
"\n", | |
" visiting_score ... home_7_name home_7_pos home_8_id \\\n", | |
"0 5 ... Gary Roenicke 7 sakal001 \n", | |
"1 3 ... Clint Hurdle 7 treva001 \n", | |
"2 11 ... Butch Wynegar 2 wilfr001 \n", | |
"3 2 ... Davey Lopes 4 kearb001 \n", | |
"4 14 ... Art Howe 3 reync001 \n", | |
"\n", | |
" home_8_name home_8_pos home_9_id home_9_name home_9_pos misc \\\n", | |
"0 Lenn Sakata 6 dauer001 Rich Dauer 4 NaN \n", | |
"1 Alex Trevino 2 sotom001 Mario Soto 1 NaN \n", | |
"2 Rob Wilfong 4 hatcm001 Mickey Hatcher 7 NaN \n", | |
"3 Bob Kearney 2 stanf101 Fred Stanley 6 NaN \n", | |
"4 Craig Reynolds 6 ryann001 Nolan Ryan 1 NaN \n", | |
"\n", | |
" acquisition_info \n", | |
"0 Y \n", | |
"1 Y \n", | |
"2 Y \n", | |
"3 Y \n", | |
"4 Y \n", | |
"\n", | |
"[5 rows x 161 columns]" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sgl_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"NT = (sgl_df.groupby('home_team')\n", | |
" ['visiting_team']\n", | |
" .nunique()\n", | |
" .add(1))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"OPC = NT / (NT - 1 + RFT)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"OPC agrees with Total Baseball\n" | |
] | |
} | |
], | |
"source": [ | |
"if VERIFY:\n", | |
" verify_agreement_with_total_baseball(OPC.loc[VERIFY_TEAM], 0.986, \"OPC\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"SF = RFT * OPC" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"SF agrees with Total Baseball\n" | |
] | |
} | |
], | |
"source": [ | |
"if VERIFY:\n", | |
" verify_agreement_with_total_baseball(SF.loc[VERIFY_TEAM], 1.154, \"SF\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"SF1 = 1 - (SF - 1) / (NT - 1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"SF1 agrees with Total Baseball\n" | |
] | |
} | |
], | |
"source": [ | |
"if VERIFY:\n", | |
" verify_agreement_with_total_baseball(SF1.loc[VERIFY_TEAM], 0.986, \"SF1\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"RAL = (sgl_df.groupby('home_team_league')\n", | |
" ['home_score', 'visiting_score']\n", | |
" .sum()\n", | |
" .sum(axis=1)\n", | |
" .div(sgl_df['home_team_league']\n", | |
" .value_counts()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"home_team_league\n", | |
"AL 8.954185\n", | |
"NL 8.175926\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"RAL" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"team_league_map = (sgl_df.groupby('home_team')\n", | |
" ['home_team_league']\n", | |
" .first())\n", | |
"\n", | |
"team_RAL = team_league_map.apply(lambda league: RAL.loc[league])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def iterate_team_ratings(TPR, RAL, NT, home_away_df):\n", | |
" assert home_away_df.index.is_monotonic_increasing\n", | |
"\n", | |
" (_, RAT), (_, RHT) = (home_away_df['r']\n", | |
" .div(home_away_df['gp'])\n", | |
" .unstack(level='team')\n", | |
" .iterrows())\n", | |
" (_, OAT), (_, OHT) = (home_away_df['ra']\n", | |
" .div(home_away_df['gp'])\n", | |
" .unstack(level='team')\n", | |
" .iterrows())\n", | |
"\n", | |
" TBR = (RAT / SF1 + RHT / SF) * (1 + (TPR - 1) / (NT - 1)) / RAL\n", | |
" TPR_ = (OAT / SF1 + OHT / SF) * (1 + (TBR - 1) / (NT - 1)) / RAL\n", | |
" \n", | |
" return TBR, TPR_\n", | |
"\n", | |
"def calculate_team_ratings(RAL, NT, home_away_df, TPR=1, iter=4):\n", | |
" for _ in range(iter):\n", | |
" TBR, TPR = iterate_team_ratings(TPR, RAL, NT, home_away_df)\n", | |
" \n", | |
" return TBR, TPR" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"TBR_1iter, TPR_1iter = calculate_team_ratings(team_RAL, NT, home_away_df, iter=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"TBR agrees with Total Baseball\n", | |
"TPR agrees with Total Baseball\n" | |
] | |
} | |
], | |
"source": [ | |
"if VERIFY:\n", | |
" verify_agreement_with_total_baseball(TBR_1iter.loc[VERIFY_TEAM], 1.044, \"TBR\", atol=1e-2)\n", | |
" verify_agreement_with_total_baseball(TPR_1iter.loc[VERIFY_TEAM], 0.993, \"TPR\", atol=1e-2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"TBR, TPR = calculate_team_ratings(team_RAL, NT, home_away_df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"BPF = (SF + SF1) / (2 * (1 + (TPR - 1) / (NT - 1)))\n", | |
"PPF = (SF + SF1) / (2 * (1 + (TBR - 1) / (NT - 1)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"BPF agrees with Total Baseball\n" | |
] | |
} | |
], | |
"source": [ | |
"if VERIFY:\n", | |
" verify_agreement_with_total_baseball(BPF.loc[VERIFY_TEAM], 1.07, \"BPF\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"team\n", | |
"ATL 1.070555\n", | |
"BAL 0.998046\n", | |
"BOS 1.101772\n", | |
"CAL 1.001600\n", | |
"CHA 0.974359\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 37, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"BPF.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"team\n", | |
"ATL 1.065539\n", | |
"BAL 0.989536\n", | |
"BOS 1.098629\n", | |
"CAL 0.987459\n", | |
"CHA 0.966823\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"PPF.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment