### Bucket games by ML and compare to actual win percentages. Are the ML actually predictive of final results? | |
# For now, we will only consider moneylines from Pinnacle! | |
# Place outcomes into bins based upon their pregame implied win probabilities | |
bins = 20 | |
df_pin = df_nba_lines[['key', 'date', 'ml_time', 'team', 'opp_team', 'score', 'ml_PIN', 'outcome', 'win_prob_PIN', 'win_prob_norm_PIN']] | |
df_pin['bin'] = pd.cut(df_pin['win_prob_norm_PIN'], bins=bins) | |
### Now, the goal is to calculate the win rate for each bin | |
# Start with grouping by bin and game outcome (W or L) | |
outcomes = df_pin.groupby(['bin', 'outcome']).size() | |
# Calculate the win AND loss rates for each bin based on game outcomes | |
win_rate = outcomes.groupby(level=0).apply(lambda x: x / float(x.sum())) | |
# Convert to df | |
df_win_rate = win_rate.reset_index(name='actual_win_rate') | |
# Filter only for win rate (remove loss rate and unneeded columns) | |
df_win_rate = df_win_rate[df_win_rate['outcome'] == 'W'][['bin', 'actual_win_rate']] | |
# Add column for the average implied win rate of each bin. This will the "expected win rate." | |
expected_win_rate_series = df_pin.groupby(['bin']).mean()['win_prob_norm_PIN'] | |
df_win_rate = df_win_rate.assign(expected_win_rate = expected_win_rate_series.values) | |
# Calculate residuals (actual minus hypothetical) | |
df_win_rate['residual'] = df_win_rate['actual_win_rate'] - df_win_rate['expected_win_rate'] | |
# Add column for the number of lines (teams) in each bin | |
size_series = df_pin.groupby(['bin']).size() | |
df_win_rate = df_win_rate.assign(count = size_series.values) | |
# Nicely formatted HTML table | |
df_win_rate.style.format({ | |
'actual_win_rate': '{:,.2%}'.format, | |
'expected_win_rate': '{:,.2%}'.format, | |
'residual': '{:,.2%}'.format | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment