Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Recreating Total Baseball Park Adjustment All Teams
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Repeating the calculations explained [here](https://www.baseball-reference.com/about/parkadjust.shtml) in Python using `pybaseball`."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import pybaseball"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"SEASON = 1982\n",
"\n",
"VERIFY = SEASON == 1982\n",
"VERIFY_TEAM = \"ATL\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"SEASON_DATA_PATH = f'./data/{SEASON}.csv'\n",
"\n",
"try:\n",
" sgl_df = pd.read_csv(SEASON_DATA_PATH)\n",
"except FileNotFoundError:\n",
" sgl_df = pybaseball.season_game_logs(SEASON)\n",
" sgl_df.to_csv(SEASON_DATA_PATH, index=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>game_num</th>\n",
" <th>day_of_week</th>\n",
" <th>visiting_team</th>\n",
" <th>visiting_team_league</th>\n",
" <th>visiting_game_num</th>\n",
" <th>home_team</th>\n",
" <th>home_team_league</th>\n",
" <th>home_team_game_num</th>\n",
" <th>visiting_score</th>\n",
" <th>...</th>\n",
" <th>home_7_name</th>\n",
" <th>home_7_pos</th>\n",
" <th>home_8_id</th>\n",
" <th>home_8_name</th>\n",
" <th>home_8_pos</th>\n",
" <th>home_9_id</th>\n",
" <th>home_9_name</th>\n",
" <th>home_9_pos</th>\n",
" <th>misc</th>\n",
" <th>acquisition_info</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19820405</td>\n",
" <td>0</td>\n",
" <td>Mon</td>\n",
" <td>KCA</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>BAL</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>...</td>\n",
" <td>Gary Roenicke</td>\n",
" <td>7</td>\n",
" <td>sakal001</td>\n",
" <td>Lenn Sakata</td>\n",
" <td>6</td>\n",
" <td>dauer001</td>\n",
" <td>Rich Dauer</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19820405</td>\n",
" <td>0</td>\n",
" <td>Mon</td>\n",
" <td>CHN</td>\n",
" <td>NL</td>\n",
" <td>1</td>\n",
" <td>CIN</td>\n",
" <td>NL</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>...</td>\n",
" <td>Clint Hurdle</td>\n",
" <td>7</td>\n",
" <td>treva001</td>\n",
" <td>Alex Trevino</td>\n",
" <td>2</td>\n",
" <td>sotom001</td>\n",
" <td>Mario Soto</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19820406</td>\n",
" <td>0</td>\n",
" <td>Tue</td>\n",
" <td>SEA</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>MIN</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>Butch Wynegar</td>\n",
" <td>2</td>\n",
" <td>wilfr001</td>\n",
" <td>Rob Wilfong</td>\n",
" <td>4</td>\n",
" <td>hatcm001</td>\n",
" <td>Mickey Hatcher</td>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19820406</td>\n",
" <td>0</td>\n",
" <td>Tue</td>\n",
" <td>CAL</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>OAK</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>Davey Lopes</td>\n",
" <td>4</td>\n",
" <td>kearb001</td>\n",
" <td>Bob Kearney</td>\n",
" <td>2</td>\n",
" <td>stanf101</td>\n",
" <td>Fred Stanley</td>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19820406</td>\n",
" <td>0</td>\n",
" <td>Tue</td>\n",
" <td>SLN</td>\n",
" <td>NL</td>\n",
" <td>1</td>\n",
" <td>HOU</td>\n",
" <td>NL</td>\n",
" <td>1</td>\n",
" <td>14</td>\n",
" <td>...</td>\n",
" <td>Art Howe</td>\n",
" <td>3</td>\n",
" <td>reync001</td>\n",
" <td>Craig Reynolds</td>\n",
" <td>6</td>\n",
" <td>ryann001</td>\n",
" <td>Nolan Ryan</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 161 columns</p>\n",
"</div>"
],
"text/plain": [
" date game_num day_of_week visiting_team visiting_team_league \\\n",
"0 19820405 0 Mon KCA AL \n",
"1 19820405 0 Mon CHN NL \n",
"2 19820406 0 Tue SEA AL \n",
"3 19820406 0 Tue CAL AL \n",
"4 19820406 0 Tue SLN NL \n",
"\n",
" visiting_game_num home_team home_team_league home_team_game_num \\\n",
"0 1 BAL AL 1 \n",
"1 1 CIN NL 1 \n",
"2 1 MIN AL 1 \n",
"3 1 OAK AL 1 \n",
"4 1 HOU NL 1 \n",
"\n",
" visiting_score ... home_7_name home_7_pos home_8_id \\\n",
"0 5 ... Gary Roenicke 7 sakal001 \n",
"1 3 ... Clint Hurdle 7 treva001 \n",
"2 11 ... Butch Wynegar 2 wilfr001 \n",
"3 2 ... Davey Lopes 4 kearb001 \n",
"4 14 ... Art Howe 3 reync001 \n",
"\n",
" home_8_name home_8_pos home_9_id home_9_name home_9_pos misc \\\n",
"0 Lenn Sakata 6 dauer001 Rich Dauer 4 NaN \n",
"1 Alex Trevino 2 sotom001 Mario Soto 1 NaN \n",
"2 Rob Wilfong 4 hatcm001 Mickey Hatcher 7 NaN \n",
"3 Bob Kearney 2 stanf101 Fred Stanley 6 NaN \n",
"4 Craig Reynolds 6 ryann001 Nolan Ryan 1 NaN \n",
"\n",
" acquisition_info \n",
"0 Y \n",
"1 Y \n",
"2 Y \n",
"3 Y \n",
"4 Y \n",
"\n",
"[5 rows x 161 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sgl_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"long_df = (sgl_df[sgl_df['home_team_league'] == sgl_df['visiting_team_league']]\n",
" .rename_axis('game_id')\n",
" .reset_index()\n",
" .melt(id_vars=['game_id', 'home_score', 'visiting_score'],\n",
" value_vars=['home_team', 'visiting_team'],\n",
" var_name='home_away', value_name='team'))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"long_df['is_home'] = long_df['home_away'] == 'home_team'\n",
"\n",
"long_df['is_win'] = (long_df['home_score']\n",
" .gt(long_df['visiting_score'])\n",
" .where(long_df['is_home'],\n",
" long_df['visiting_score']\n",
" .gt(long_df['home_score'])))\n",
"\n",
"long_df['r'] = long_df['home_score'].where(long_df['is_home'], long_df['visiting_score'])\n",
"long_df['ra'] = long_df['home_score'].where(~long_df['is_home'], long_df['visiting_score'])\n",
"long_df['rtot'] = long_df[['r', 'ra']].sum(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>game_id</th>\n",
" <th>home_score</th>\n",
" <th>visiting_score</th>\n",
" <th>home_away</th>\n",
" <th>team</th>\n",
" <th>is_home</th>\n",
" <th>is_win</th>\n",
" <th>r</th>\n",
" <th>ra</th>\n",
" <th>rtot</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>5</td>\n",
" <td>home_team</td>\n",
" <td>BAL</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>13</td>\n",
" <td>5</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>home_team</td>\n",
" <td>CIN</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>11</td>\n",
" <td>home_team</td>\n",
" <td>MIN</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>7</td>\n",
" <td>11</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>home_team</td>\n",
" <td>OAK</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>14</td>\n",
" <td>home_team</td>\n",
" <td>HOU</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>3</td>\n",
" <td>14</td>\n",
" <td>17</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" game_id home_score visiting_score home_away team is_home is_win r \\\n",
"0 0 13 5 home_team BAL True True 13 \n",
"1 1 2 3 home_team CIN True False 2 \n",
"2 2 7 11 home_team MIN True False 7 \n",
"3 3 3 2 home_team OAK True True 3 \n",
"4 4 3 14 home_team HOU True False 3 \n",
"\n",
" ra rtot \n",
"0 5 18 \n",
"1 3 5 \n",
"2 11 18 \n",
"3 2 5 \n",
"4 14 17 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"long_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>game_id</th>\n",
" <th>home_score</th>\n",
" <th>visiting_score</th>\n",
" <th>home_away</th>\n",
" <th>team</th>\n",
" <th>is_home</th>\n",
" <th>is_win</th>\n",
" <th>r</th>\n",
" <th>ra</th>\n",
" <th>rtot</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4209</th>\n",
" <td>2102</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>visiting_team</td>\n",
" <td>CIN</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4210</th>\n",
" <td>2103</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>visiting_team</td>\n",
" <td>NYN</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4211</th>\n",
" <td>2104</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>visiting_team</td>\n",
" <td>MON</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4212</th>\n",
" <td>2105</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>visiting_team</td>\n",
" <td>ATL</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4213</th>\n",
" <td>2106</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>visiting_team</td>\n",
" <td>LAN</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" game_id home_score visiting_score home_away team is_home \\\n",
"4209 2102 3 0 visiting_team CIN False \n",
"4210 2103 4 1 visiting_team NYN False \n",
"4211 2104 1 6 visiting_team MON False \n",
"4212 2105 5 1 visiting_team ATL False \n",
"4213 2106 5 3 visiting_team LAN False \n",
"\n",
" is_win r ra rtot \n",
"4209 False 0 3 3 \n",
"4210 False 1 4 5 \n",
"4211 True 6 1 7 \n",
"4212 False 1 5 6 \n",
"4213 False 3 5 8 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"long_df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"home_away_df = (long_df.groupby(['team', 'is_home'])\n",
" .agg({'r': 'sum',\n",
" 'ra': 'sum',\n",
" 'rtot': 'sum',\n",
" 'is_win': 'sum',\n",
" 'game_id': 'size'})\n",
" .rename(columns={'is_win': 'wins', 'game_id': 'gp'}))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>r</th>\n",
" <th>ra</th>\n",
" <th>rtot</th>\n",
" <th>wins</th>\n",
" <th>gp</th>\n",
" </tr>\n",
" <tr>\n",
" <th>team</th>\n",
" <th>is_home</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">ATL</th>\n",
" <th>False</th>\n",
" <td>351</td>\n",
" <td>315</td>\n",
" <td>666</td>\n",
" <td>47.0</td>\n",
" <td>81</td>\n",
" </tr>\n",
" <tr>\n",
" <th>True</th>\n",
" <td>388</td>\n",
" <td>387</td>\n",
" <td>775</td>\n",
" <td>42.0</td>\n",
" <td>81</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">BAL</th>\n",
" <th>False</th>\n",
" <td>377</td>\n",
" <td>357</td>\n",
" <td>734</td>\n",
" <td>41.0</td>\n",
" <td>81</td>\n",
" </tr>\n",
" <tr>\n",
" <th>True</th>\n",
" <td>397</td>\n",
" <td>330</td>\n",
" <td>727</td>\n",
" <td>53.0</td>\n",
" <td>82</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BOS</th>\n",
" <th>False</th>\n",
" <td>319</td>\n",
" <td>343</td>\n",
" <td>662</td>\n",
" <td>40.0</td>\n",
" <td>81</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" r ra rtot wins gp\n",
"team is_home \n",
"ATL False 351 315 666 47.0 81\n",
" True 388 387 775 42.0 81\n",
"BAL False 377 357 734 41.0 81\n",
" True 397 330 727 53.0 82\n",
"BOS False 319 343 662 40.0 81"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"home_away_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"home_away_pct = (home_away_df['wins']\n",
" .div(home_away_df['gp'])\n",
" .unstack(level='is_home'))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>is_home</th>\n",
" <th>False</th>\n",
" <th>True</th>\n",
" </tr>\n",
" <tr>\n",
" <th>team</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>ATL</th>\n",
" <td>0.580247</td>\n",
" <td>0.518519</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BAL</th>\n",
" <td>0.506173</td>\n",
" <td>0.646341</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BOS</th>\n",
" <td>0.493827</td>\n",
" <td>0.604938</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CAL</th>\n",
" <td>0.506173</td>\n",
" <td>0.641975</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CHA</th>\n",
" <td>0.463415</td>\n",
" <td>0.612500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"is_home False True \n",
"team \n",
"ATL 0.580247 0.518519\n",
"BAL 0.506173 0.646341\n",
"BOS 0.493827 0.604938\n",
"CAL 0.506173 0.641975\n",
"CHA 0.463415 0.612500"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"home_away_pct.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"IPC = (18.5 - home_away_pct[True]) / (18.5 - (1 - home_away_pct[False]))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def verify_agreement_with_total_baseball(val, true_val, name, atol=1e-3):\n",
" assert np.allclose(val, true_val, atol=atol)\n",
" print(f\"{name} agrees with Total Baseball\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"IPC agrees with Total Baseball\n"
]
}
],
"source": [
"if VERIFY:\n",
" verify_agreement_with_total_baseball(IPC.loc[VERIFY_TEAM], 0.995, \"IPC\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"rpg = home_away_df['rtot'] / home_away_df['gp']"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"RFT = (rpg.xs(True, level='is_home')\n",
" .div(rpg.xs(False, level='is_home'))\n",
" .div(IPC))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RFT agrees with Total Baseball\n"
]
}
],
"source": [
"if VERIFY:\n",
" verify_agreement_with_total_baseball(RFT.loc[VERIFY_TEAM], 1.170, \"RFT\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>game_num</th>\n",
" <th>day_of_week</th>\n",
" <th>visiting_team</th>\n",
" <th>visiting_team_league</th>\n",
" <th>visiting_game_num</th>\n",
" <th>home_team</th>\n",
" <th>home_team_league</th>\n",
" <th>home_team_game_num</th>\n",
" <th>visiting_score</th>\n",
" <th>...</th>\n",
" <th>home_7_name</th>\n",
" <th>home_7_pos</th>\n",
" <th>home_8_id</th>\n",
" <th>home_8_name</th>\n",
" <th>home_8_pos</th>\n",
" <th>home_9_id</th>\n",
" <th>home_9_name</th>\n",
" <th>home_9_pos</th>\n",
" <th>misc</th>\n",
" <th>acquisition_info</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19820405</td>\n",
" <td>0</td>\n",
" <td>Mon</td>\n",
" <td>KCA</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>BAL</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>...</td>\n",
" <td>Gary Roenicke</td>\n",
" <td>7</td>\n",
" <td>sakal001</td>\n",
" <td>Lenn Sakata</td>\n",
" <td>6</td>\n",
" <td>dauer001</td>\n",
" <td>Rich Dauer</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19820405</td>\n",
" <td>0</td>\n",
" <td>Mon</td>\n",
" <td>CHN</td>\n",
" <td>NL</td>\n",
" <td>1</td>\n",
" <td>CIN</td>\n",
" <td>NL</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>...</td>\n",
" <td>Clint Hurdle</td>\n",
" <td>7</td>\n",
" <td>treva001</td>\n",
" <td>Alex Trevino</td>\n",
" <td>2</td>\n",
" <td>sotom001</td>\n",
" <td>Mario Soto</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19820406</td>\n",
" <td>0</td>\n",
" <td>Tue</td>\n",
" <td>SEA</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>MIN</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>Butch Wynegar</td>\n",
" <td>2</td>\n",
" <td>wilfr001</td>\n",
" <td>Rob Wilfong</td>\n",
" <td>4</td>\n",
" <td>hatcm001</td>\n",
" <td>Mickey Hatcher</td>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19820406</td>\n",
" <td>0</td>\n",
" <td>Tue</td>\n",
" <td>CAL</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>OAK</td>\n",
" <td>AL</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>Davey Lopes</td>\n",
" <td>4</td>\n",
" <td>kearb001</td>\n",
" <td>Bob Kearney</td>\n",
" <td>2</td>\n",
" <td>stanf101</td>\n",
" <td>Fred Stanley</td>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19820406</td>\n",
" <td>0</td>\n",
" <td>Tue</td>\n",
" <td>SLN</td>\n",
" <td>NL</td>\n",
" <td>1</td>\n",
" <td>HOU</td>\n",
" <td>NL</td>\n",
" <td>1</td>\n",
" <td>14</td>\n",
" <td>...</td>\n",
" <td>Art Howe</td>\n",
" <td>3</td>\n",
" <td>reync001</td>\n",
" <td>Craig Reynolds</td>\n",
" <td>6</td>\n",
" <td>ryann001</td>\n",
" <td>Nolan Ryan</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 161 columns</p>\n",
"</div>"
],
"text/plain": [
" date game_num day_of_week visiting_team visiting_team_league \\\n",
"0 19820405 0 Mon KCA AL \n",
"1 19820405 0 Mon CHN NL \n",
"2 19820406 0 Tue SEA AL \n",
"3 19820406 0 Tue CAL AL \n",
"4 19820406 0 Tue SLN NL \n",
"\n",
" visiting_game_num home_team home_team_league home_team_game_num \\\n",
"0 1 BAL AL 1 \n",
"1 1 CIN NL 1 \n",
"2 1 MIN AL 1 \n",
"3 1 OAK AL 1 \n",
"4 1 HOU NL 1 \n",
"\n",
" visiting_score ... home_7_name home_7_pos home_8_id \\\n",
"0 5 ... Gary Roenicke 7 sakal001 \n",
"1 3 ... Clint Hurdle 7 treva001 \n",
"2 11 ... Butch Wynegar 2 wilfr001 \n",
"3 2 ... Davey Lopes 4 kearb001 \n",
"4 14 ... Art Howe 3 reync001 \n",
"\n",
" home_8_name home_8_pos home_9_id home_9_name home_9_pos misc \\\n",
"0 Lenn Sakata 6 dauer001 Rich Dauer 4 NaN \n",
"1 Alex Trevino 2 sotom001 Mario Soto 1 NaN \n",
"2 Rob Wilfong 4 hatcm001 Mickey Hatcher 7 NaN \n",
"3 Bob Kearney 2 stanf101 Fred Stanley 6 NaN \n",
"4 Craig Reynolds 6 ryann001 Nolan Ryan 1 NaN \n",
"\n",
" acquisition_info \n",
"0 Y \n",
"1 Y \n",
"2 Y \n",
"3 Y \n",
"4 Y \n",
"\n",
"[5 rows x 161 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sgl_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"NT = (sgl_df.groupby('home_team')\n",
" ['visiting_team']\n",
" .nunique()\n",
" .add(1))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"OPC = NT / (NT - 1 + RFT)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OPC agrees with Total Baseball\n"
]
}
],
"source": [
"if VERIFY:\n",
" verify_agreement_with_total_baseball(OPC.loc[VERIFY_TEAM], 0.986, \"OPC\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"SF = RFT * OPC"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SF agrees with Total Baseball\n"
]
}
],
"source": [
"if VERIFY:\n",
" verify_agreement_with_total_baseball(SF.loc[VERIFY_TEAM], 1.154, \"SF\")"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"SF1 = 1 - (SF - 1) / (NT - 1)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SF1 agrees with Total Baseball\n"
]
}
],
"source": [
"if VERIFY:\n",
" verify_agreement_with_total_baseball(SF1.loc[VERIFY_TEAM], 0.986, \"SF1\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"RAL = (sgl_df.groupby('home_team_league')\n",
" ['home_score', 'visiting_score']\n",
" .sum()\n",
" .sum(axis=1)\n",
" .div(sgl_df['home_team_league']\n",
" .value_counts()))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"home_team_league\n",
"AL 8.954185\n",
"NL 8.175926\n",
"dtype: float64"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"RAL"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"team_league_map = (sgl_df.groupby('home_team')\n",
" ['home_team_league']\n",
" .first())\n",
"\n",
"team_RAL = team_league_map.apply(lambda league: RAL.loc[league])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"def iterate_team_ratings(TPR, RAL, NT, home_away_df):\n",
" assert home_away_df.index.is_monotonic_increasing\n",
"\n",
" (_, RAT), (_, RHT) = (home_away_df['r']\n",
" .div(home_away_df['gp'])\n",
" .unstack(level='team')\n",
" .iterrows())\n",
" (_, OAT), (_, OHT) = (home_away_df['ra']\n",
" .div(home_away_df['gp'])\n",
" .unstack(level='team')\n",
" .iterrows())\n",
"\n",
" TBR = (RAT / SF1 + RHT / SF) * (1 + (TPR - 1) / (NT - 1)) / RAL\n",
" TPR_ = (OAT / SF1 + OHT / SF) * (1 + (TBR - 1) / (NT - 1)) / RAL\n",
" \n",
" return TBR, TPR_\n",
"\n",
"def calculate_team_ratings(RAL, NT, home_away_df, TPR=1, iter=4):\n",
" for _ in range(iter):\n",
" TBR, TPR = iterate_team_ratings(TPR, RAL, NT, home_away_df)\n",
" \n",
" return TBR, TPR"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"TBR_1iter, TPR_1iter = calculate_team_ratings(team_RAL, NT, home_away_df, iter=1)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TBR agrees with Total Baseball\n",
"TPR agrees with Total Baseball\n"
]
}
],
"source": [
"if VERIFY:\n",
" verify_agreement_with_total_baseball(TBR_1iter.loc[VERIFY_TEAM], 1.044, \"TBR\", atol=1e-2)\n",
" verify_agreement_with_total_baseball(TPR_1iter.loc[VERIFY_TEAM], 0.993, \"TPR\", atol=1e-2)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"TBR, TPR = calculate_team_ratings(team_RAL, NT, home_away_df)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"BPF = (SF + SF1) / (2 * (1 + (TPR - 1) / (NT - 1)))\n",
"PPF = (SF + SF1) / (2 * (1 + (TBR - 1) / (NT - 1)))"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"BPF agrees with Total Baseball\n"
]
}
],
"source": [
"if VERIFY:\n",
" verify_agreement_with_total_baseball(BPF.loc[VERIFY_TEAM], 1.07, \"BPF\")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"team\n",
"ATL 1.070555\n",
"BAL 0.998046\n",
"BOS 1.101772\n",
"CAL 1.001600\n",
"CHA 0.974359\n",
"dtype: float64"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"BPF.head()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"team\n",
"ATL 1.065539\n",
"BAL 0.989536\n",
"BOS 1.098629\n",
"CAL 0.987459\n",
"CHA 0.966823\n",
"dtype: float64"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"PPF.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.