-
-
Save lambertchu/3aa1516cae27814325b4c4c6aa0611fe to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Bucket games by ML and compare to actual win percentages. Are the ML actually predictive of final results? | |
# For now, we will only consider moneylines from Pinnacle! | |
# Place outcomes into bins based upon their pregame implied win probabilities | |
bins = 20 | |
df_pin = df_nba_lines[['key', 'date', 'ml_time', 'team', 'opp_team', 'score', 'ml_PIN', 'outcome', 'win_prob_PIN', 'win_prob_norm_PIN']] | |
df_pin['bin'] = pd.cut(df_pin['win_prob_norm_PIN'], bins=bins) | |
### Now, the goal is to calculate the win rate for each bin | |
# Start with grouping by bin and game outcome (W or L) | |
outcomes = df_pin.groupby(['bin', 'outcome']).size() | |
# Calculate the win AND loss rates for each bin based on game outcomes | |
win_rate = outcomes.groupby(level=0).apply(lambda x: x / float(x.sum())) | |
# Convert to df | |
df_win_rate = win_rate.reset_index(name='actual_win_rate') | |
# Filter only for win rate (remove loss rate and unneeded columns) | |
df_win_rate = df_win_rate[df_win_rate['outcome'] == 'W'][['bin', 'actual_win_rate']] | |
# Add column for the average implied win rate of each bin. This will the "expected win rate." | |
expected_win_rate_series = df_pin.groupby(['bin']).mean()['win_prob_norm_PIN'] | |
df_win_rate = df_win_rate.assign(expected_win_rate = expected_win_rate_series.values) | |
# Calculate residuals (actual minus hypothetical) | |
df_win_rate['residual'] = df_win_rate['actual_win_rate'] - df_win_rate['expected_win_rate'] | |
# Add column for the number of lines (teams) in each bin | |
size_series = df_pin.groupby(['bin']).size() | |
df_win_rate = df_win_rate.assign(count = size_series.values) | |
# Nicely formatted HTML table | |
df_win_rate.style.format({ | |
'actual_win_rate': '{:,.2%}'.format, | |
'expected_win_rate': '{:,.2%}'.format, | |
'residual': '{:,.2%}'.format | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment