lambertchu/bin_odds.py Secret

## bin_odds.py
### Bucket games by ML and compare to actual win percentages. Are the ML actually predictive of final results?
# For now, we will only consider moneylines from Pinnacle!

# Place outcomes into bins based upon their pregame implied win probabilities
bins = 20
df_pin = df_nba_lines[['key', 'date', 'ml_time', 'team', 'opp_team', 'score', 'ml_PIN', 'outcome', 'win_prob_PIN', 'win_prob_norm_PIN']]
df_pin['bin'] = pd.cut(df_pin['win_prob_norm_PIN'], bins=bins)

###  Now, the goal is to calculate the win rate for each bin
# Start with grouping by bin and game outcome (W or L)
outcomes = df_pin.groupby(['bin', 'outcome']).size()

# Calculate the win AND loss rates for each bin based on game outcomes
win_rate = outcomes.groupby(level=0).apply(lambda x: x / float(x.sum()))

# Convert to df
df_win_rate = win_rate.reset_index(name='actual_win_rate')

# Filter only for win rate (remove loss rate and unneeded columns)
df_win_rate = df_win_rate[df_win_rate['outcome'] == 'W'][['bin', 'actual_win_rate']]

# Add column for the average implied win rate of each bin. This will the "expected win rate."
expected_win_rate_series = df_pin.groupby(['bin']).mean()['win_prob_norm_PIN']
df_win_rate = df_win_rate.assign(expected_win_rate = expected_win_rate_series.values)

# Calculate residuals (actual minus hypothetical)
df_win_rate['residual'] = df_win_rate['actual_win_rate'] - df_win_rate['expected_win_rate']

# Add column for the number of lines (teams) in each bin
size_series = df_pin.groupby(['bin']).size()
df_win_rate = df_win_rate.assign(count = size_series.values)

# Nicely formatted HTML table
df_win_rate.style.format({
    'actual_win_rate': '{:,.2%}'.format,
    'expected_win_rate': '{:,.2%}'.format,
    'residual': '{:,.2%}'.format
})
	### Bucket games by ML and compare to actual win percentages. Are the ML actually predictive of final results?
	# For now, we will only consider moneylines from Pinnacle!

	# Place outcomes into bins based upon their pregame implied win probabilities
	bins = 20
	df_pin = df_nba_lines[['key', 'date', 'ml_time', 'team', 'opp_team', 'score', 'ml_PIN', 'outcome', 'win_prob_PIN', 'win_prob_norm_PIN']]
	df_pin['bin'] = pd.cut(df_pin['win_prob_norm_PIN'], bins=bins)

	### Now, the goal is to calculate the win rate for each bin
	# Start with grouping by bin and game outcome (W or L)
	outcomes = df_pin.groupby(['bin', 'outcome']).size()

	# Calculate the win AND loss rates for each bin based on game outcomes
	win_rate = outcomes.groupby(level=0).apply(lambda x: x / float(x.sum()))

	# Convert to df
	df_win_rate = win_rate.reset_index(name='actual_win_rate')

	# Filter only for win rate (remove loss rate and unneeded columns)
	df_win_rate = df_win_rate[df_win_rate['outcome'] == 'W'][['bin', 'actual_win_rate']]

	# Add column for the average implied win rate of each bin. This will the "expected win rate."
	expected_win_rate_series = df_pin.groupby(['bin']).mean()['win_prob_norm_PIN']
	df_win_rate = df_win_rate.assign(expected_win_rate = expected_win_rate_series.values)

	# Calculate residuals (actual minus hypothetical)
	df_win_rate['residual'] = df_win_rate['actual_win_rate'] - df_win_rate['expected_win_rate']

	# Add column for the number of lines (teams) in each bin
	size_series = df_pin.groupby(['bin']).size()
	df_win_rate = df_win_rate.assign(count = size_series.values)

	# Nicely formatted HTML table
	df_win_rate.style.format({
	'actual_win_rate': '{:,.2%}'.format,
	'expected_win_rate': '{:,.2%}'.format,
	'residual': '{:,.2%}'.format
	})