Skip to content
{{ message }}

Instantly share code, notes, and snippets.

# lambertchu/bin_odds.py Secret

Created Dec 29, 2020
 ### Bucket games by ML and compare to actual win percentages. Are the ML actually predictive of final results? # For now, we will only consider moneylines from Pinnacle! # Place outcomes into bins based upon their pregame implied win probabilities bins = 20 df_pin = df_nba_lines[['key', 'date', 'ml_time', 'team', 'opp_team', 'score', 'ml_PIN', 'outcome', 'win_prob_PIN', 'win_prob_norm_PIN']] df_pin['bin'] = pd.cut(df_pin['win_prob_norm_PIN'], bins=bins) ### Now, the goal is to calculate the win rate for each bin # Start with grouping by bin and game outcome (W or L) outcomes = df_pin.groupby(['bin', 'outcome']).size() # Calculate the win AND loss rates for each bin based on game outcomes win_rate = outcomes.groupby(level=0).apply(lambda x: x / float(x.sum())) # Convert to df df_win_rate = win_rate.reset_index(name='actual_win_rate') # Filter only for win rate (remove loss rate and unneeded columns) df_win_rate = df_win_rate[df_win_rate['outcome'] == 'W'][['bin', 'actual_win_rate']] # Add column for the average implied win rate of each bin. This will the "expected win rate." expected_win_rate_series = df_pin.groupby(['bin']).mean()['win_prob_norm_PIN'] df_win_rate = df_win_rate.assign(expected_win_rate = expected_win_rate_series.values) # Calculate residuals (actual minus hypothetical) df_win_rate['residual'] = df_win_rate['actual_win_rate'] - df_win_rate['expected_win_rate'] # Add column for the number of lines (teams) in each bin size_series = df_pin.groupby(['bin']).size() df_win_rate = df_win_rate.assign(count = size_series.values) # Nicely formatted HTML table df_win_rate.style.format({ 'actual_win_rate': '{:,.2%}'.format, 'expected_win_rate': '{:,.2%}'.format, 'residual': '{:,.2%}'.format })
to join this conversation on GitHub. Already have an account? Sign in to comment