Skip to content

Instantly share code, notes, and snippets.

@endlesspint8
Last active June 26, 2023 19:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save endlesspint8/2eaae1e452ce7d5a5edd46277c0459fb to your computer and use it in GitHub Desktop.
Save endlesspint8/2eaae1e452ce7d5a5edd46277c0459fb to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline
# team xref information
team_xref = pd.read_csv('data/team_xref.csv', sep=';')
print team_xref.shape
team_xref.head()
# Out
# (32, 4)
# long_name short_name conference division
# 0 Arizona Cardinals ARI NFC NFC West
# 1 Atlanta Falcons ATL NFC NFC South
# 2 Baltimore Ravens BAL AFC AFC North
# 3 Buffalo Bills BUF AFC AFC East
# 4 Carolina Panthers CAR NFC NFC South
# import 538 data
fivthir_apriori = []
with open('data/538_apriori_sup_season2.csv', 'r') as f:
reader = csv.reader(f, delimiter=';')
next(reader)
for row in reader:
fivthir_apriori.append((row[0], row[1], row[2], row[3], row[4], row[5], float(row[6])))
print len(fivthir_apriori)
print "away\thome\taway_conf\thome_conf\taway_div\thome_div\taway_prob"
fivthir_apriori[:5]
# Out
# 256
# away home away_conf home_conf away_div home_div away_prob
# [('CAR', 'DEN', 'NFC', 'AFC', 'NFC South', 'AFC West', 0.4),
# ('BUF', 'BAL', 'AFC', 'AFC', 'AFC East', 'AFC North', 0.47),
# ('CHI', 'HOU', 'NFC', 'AFC', 'NFC North', 'AFC South', 0.33),
# ('CIN', 'NYJ', 'AFC', 'AFC', 'AFC North', 'AFC East', 0.49),
# ('CLE', 'PHI', 'AFC', 'NFC', 'AFC North', 'NFC East', 0.29)]
# season sim
np.random.seed(538)
def season_sims(game_probabilities, seasons=100):
team_win_sim_count = {team: {i:0 for i in range(17)} for team in team_xref.short_name}
away_win_probs = np.array([game[6] for game in game_probabilities])
samp_season = np.random.random((seasons, len(away_win_probs)))
# samp_season
samp_away_wins = samp_season < away_win_probs
samp_home_wins = np.ones((seasons, len(away_win_probs))) - samp_away_wins
for i in range(seasons):
samp_seasons_cnt = Counter()
for game,away_w,home_w in zip(game_probabilities, samp_away_wins[i], samp_home_wins[i]):
away_team, home_team, game_prob = game[0], game[1], game[6]
samp_seasons_cnt[away_team] += away_w
samp_seasons_cnt[home_team] += home_w
for team in samp_seasons_cnt:
team_win_sim_count[team][int(samp_seasons_cnt[team])] += 1
return team_win_sim_count
tenK_seasons_538 = pd.DataFrame(season_sims(fivthir_apriori,10000)).T
tenK_seasons_538
import numpy as np
import pandas as pd
five38 = winConf[winConf.FTE.notnull()][['win_prob', 'FTE', 'FTE_cnt']]
print five38.head()
# win_prob FTE FTE_cnt
# 1 0.50 0.000000 4
# 2 0.51 0.666667 6
# 3 0.52 0.555556 9
# 4 0.53 0.625000 8
# 5 0.54 0.400000 10
buckets = np.arange(.5,1.05,.1)
def bucketize(df):
conf_list = []
win_list = []
cnt_list = []
for i in range(len(buckets)-1):
buck_min, buck_max = buckets[i], buckets[i+1]
cnt_games = np.sum(df[(df.iloc[:,0]>=buck_min) & (df.iloc[:,0]<buck_max)].iloc[:,2])
per_conf = np.round(np.sum(df[(df.iloc[:,0]>=buck_min) & (df.iloc[:,0]<buck_max)].iloc[:,0] \
* df[(df.iloc[:,0]>=buck_min) & (df.iloc[:,0]<buck_max)].iloc[:,2])/ cnt_games, 2)
per_win = np.round(np.sum(df[(df.iloc[:,0]>=buck_min) & (df.iloc[:,0]<buck_max)].iloc[:,1] \
* df[(df.iloc[:,0]>=buck_min) & (df.iloc[:,0]<buck_max)].iloc[:,2])/ cnt_games, 2)
conf_list.append(per_conf)
win_list.append(per_win)
cnt_list.append(cnt_games)
return (conf_list, win_list, cnt_list)
bucketize(five38)
# ([0.55000000000000004,
# 0.65000000000000002,
# 0.73999999999999999,
# 0.81999999999999995,
# 0.90000000000000002],
# [0.54000000000000004,
# 0.57999999999999996,
# 0.70999999999999996,
# 0.84999999999999998,
# 1.0],
# [85L, 60L, 42L, 20L, 1L])
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline
# team xref information
team_xref = pd.read_csv('data/team_xref.csv', sep=';')
print team_xref.shape
team_xref.head()
# Out
# (32, 4)
# long_name short_name conference division
# 0 Arizona Cardinals ARI NFC NFC West
# 1 Atlanta Falcons ATL NFC NFC South
# 2 Baltimore Ravens BAL AFC AFC North
# 3 Buffalo Bills BUF AFC AFC East
# 4 Carolina Panthers CAR NFC NFC South
# import data
fox_apriori = []
with open('data/FOX_apriori_sup_season.csv', 'r') as f:
reader = csv.reader(f, delimiter=';')
next(reader)
for row in reader:
fox_apriori.append((row[0], row[1], row[2], row[3], row[4], row[5], float(row[6])))
print len(fox_apriori)
print "away\thome\taway_conf\thome_conf\taway_div\thome_div\taway_prob"
fox_apriori[:5]
# Out
# 256
# away home away_conf home_conf away_div home_div away_prob
# [('CAR', 'DEN', 'NFC', 'AFC', 'NFC South', 'AFC West', 0.67),
# ('BUF', 'BAL', 'AFC', 'AFC', 'AFC East', 'AFC North', 0.553),
# ('CHI', 'HOU', 'NFC', 'AFC', 'NFC North', 'AFC South', 0.287),
# ('CIN', 'NYJ', 'AFC', 'AFC', 'AFC North', 'AFC East', 0.479),
# ('CLE', 'PHI', 'AFC', 'NFC', 'AFC North', 'NFC East', 0.467)]
from collections import defaultdict
def season_prob(apriori):
simple_prob_cnt = defaultdict(lambda : defaultdict(int))
for game in apriori:
away_team, home_team, away_conf, home_conf, \
away_div, home_div, away_prob = game
simple_prob_cnt[away_team]['all'] += away_prob
simple_prob_cnt[away_team]['away'] += away_prob
if away_conf == home_conf:
simple_prob_cnt[away_team]['conf'] += away_prob
if away_div == home_div:
simple_prob_cnt[away_team]['div'] += away_prob
simple_prob_cnt[home_team]['all'] += 1.0 - away_prob
simple_prob_cnt[home_team]['home'] += 1.0 - away_prob
if away_conf == home_conf:
simple_prob_cnt[home_team]['conf'] += 1.0 - away_prob
if away_div == home_div:
simple_prob_cnt[home_team]['div'] += 1.0 - away_prob
return simple_prob_cnt
d = season_prob(fox_apriori)
prob_df = np.around(pd.DataFrame(d), 1).T.fillna(0)
prob_cols = prob_df.columns
prob_df.columns = ['win_total', 'road_wins', 'conf_wins', 'div_wins', 'home_wins']
prob_df = prob_df[['win_total', 'home_wins', 'road_wins', 'div_wins', 'conf_wins']]
prob_standings_fox = team_xref.merge(prob_df, left_on='short_name', right_index=True)\
.sort_values(['conference', 'division', 'win_total',
'div_wins', 'conf_wins'], ascending=[1, 1, 0, 0, 0])
print prob_standings_fox.head()
# Out
# long_name short_name conference division win_total \
# 19 New England Patriots NE AFC AFC East 9.5
# 22 New York Jets NYJ AFC AFC East 9.4
# 3 Buffalo Bills BUF AFC AFC East 7.9
# 17 Miami Dolphins MIA AFC AFC East 6.8
# 25 Pittsburgh Steelers PIT AFC AFC North 10.7
# home_wins road_wins div_wins conf_wins
# 19 4.5 5.0 3.5 7.2
# 22 4.7 4.7 3.5 6.9
# 3 3.9 4.0 2.8 5.8
# 17 3.4 3.3 2.2 4.9
# 25 5.1 5.6 4.2 7.7
import numpy as np
import pandas as pd
for src in [cbs, espn, five38, fox]:
# diff, squared, * game count, summed & divided by total games per src
print np.sqrt(np.sum(np.square(np.array(bucketize(src)[0]) - np.array(bucketize(src)[1])) * np.array(bucketize(src)[2]))/np.sum(bucketize(src)[2]))
# 0.177597481358
# 0.165536714496
# 0.0420793664035
# 0.0830922795636
def rmse_weighted(df):
return np.sqrt(np.sum(np.square(df.iloc[:,0] - df.iloc[:,1]) * df.iloc[:,2])/np.sum(df.iloc[:,2]))
for src in [cbs, espn, five38, fox]:
print rmse_weighted(src)
# 0.181041021208
# 0.16749439121
# 0.190953289846
# 0.212558319291
# inspired by:
# http://stackoverflow.com/questions/33019879/hierarchic-pie-donut-chart-from-pandas-dataframe-using-bokeh-or-matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline
correct_per = [0.875, 0.636, 0.500, 0.500, 0.583, 0.556, 0.625]
correct_per = [[i] for i in correct_per]
# "compounded" correct picks
for i in range(1, len(correct_per)):
correct_per[i][0] = correct_per[i][0] * correct_per[i-1][0]
WL_pairs = [[i[0], 1.0-i[0]] for i in correct_per]
for i in range(len(WL_pairs)):
if i == 0:
WL_pairs[i].append(0)
else:
WL_pairs[i].append(np.sum(WL_pairs[i-1][1:]))
WL_pairs[i][1] -= WL_pairs[i][2]
print WL_pairs
print [np.sum(i) for i in WL_pairs]
colors = ['dodgerblue', 'darkblue', 'black']
plt.figure(figsize=(10,10))
plt.pie(WL_pairs[-1], colors=colors)
ax = plt.gca()
for i in np.arange(len(WL_pairs)-2,-1,-1):
ax.pie(WL_pairs[i], colors=colors,
radius=np.float(i+1)/float(len(WL_pairs)))
ax.set_aspect('equal')
import csv, os
import numpy as np
import pandas as pd
# print pd.__version__
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline
home_lines = pd.read_csv('data/ATS_SUP_home_conf.csv', sep=';')
print home_lines.shape
print home_lines.dtypes
print ''
print home_lines.head() # only first 3 weeks of season at this point
# (48, 9)
# week object
# away_short_name object
# home_short_name object
# away_score int64
# home_score int64
# home_line float64
# home_conf object
# home_favBy float64
# home_ptd int64
# dtype: object
# week away_short_name home_short_name away_score home_score home_line \
# 0 Week 1 CAR DEN 20 21 3.5
# 1 Week 1 BUF BAL 7 13 -3.5
# 2 Week 1 CHI HOU 14 23 -6.5
# 3 Week 1 CIN NYJ 23 22 2.5
# 4 Week 1 CLE PHI 10 29 -3.5
# home_conf home_favBy home_ptd
# 0 AFC -3.5 1
# 1 AFC 3.5 6
# 2 AFC 6.5 9
# 3 AFC -2.5 -1
# 4 NFC 3.5 19
home_lines['ats_ptd'] = home_lines.home_ptd - home_lines.home_favBy
home_lines['abs_line_ptd'] = np.absolute(home_lines.home_favBy - home_lines.home_ptd)
# not a perfect way of making smaller points visible // get over it
home_lines['marker_size'] = home_lines.abs_line_ptd + 2.0/home_lines.abs_line_ptd
### now, the plot
from math import pi
from bokeh.plotting import figure, output_file, output_notebook, show, ColumnDataSource
from bokeh.models import HoverTool, Legend, Circle, Square
output_notebook()
output_file("sup_ats.html")
bk_afc = home_lines[home_lines.home_conf=='AFC']
bk_nfc = home_lines[home_lines.home_conf=='NFC']
source_afc = ColumnDataSource(data=bk_afc)
source_nfc = ColumnDataSource(data=bk_nfc)
TOOLS = "pan,wheel_zoom,reset,save"
p = figure(plot_width=600, plot_height=600, tools=TOOLS,
title="2016 Performance SUP & ATS (through week 3)"
)
# http://stackoverflow.com/questions/29435200/bokeh-plotting-enable-tooltips-for-only-some-glyphs
g1 = Circle(x='home_favBy', y='home_ptd',
size='marker_size', fill_color="firebrick",
line_width=0.5,
fill_alpha=0.5)
g1_r = p.add_glyph(source_or_glyph=source_afc, glyph=g1)
g1_hover = HoverTool(renderers=[g1_r],
tooltips=[("Game", "@away_short_name at @home_short_name"),
("Final Score", "@away_score - @home_score"),
("Home Line", "@home_line"),
("Home Conf", "@home_conf"),
("Week", "@week")])
p.add_tools(g1_hover)
g2 = Square(x='home_favBy', y='home_ptd',
size='marker_size', fill_color="navy",
line_width=0.5,
fill_alpha=0.5)
g2_r = p.add_glyph(source_or_glyph=source_nfc, glyph=g2)
g2_hover = HoverTool(renderers=[g2_r],
tooltips=[("Game", "@away_short_name at @home_short_name"),
("Final Score", "@away_score - @home_score"),
("Home Line", "@home_line"),
("Home Conf", "@home_conf"),
("Week", "@week")])
p.add_tools(g2_hover)
p.multi_line([[-35, 0, 35], [0, 0, 0]],
[[0, 0, 0], [-35, 0, 35]],
color="black",
alpha=0.8,
line_width=2
)
p.line([-30, 0, 30], [-30, 0, 30], line_width=2, line_dash=[4, 4])
p.text(26, 1, text=["Win SUP"], text_font_size="8pt", text_color="darkblue")
p.text(26, -3, text=["Lose SUP"], text_font_size="8pt", text_color="dodgerblue")
p.text(23, 24, text=["Win ATS"], text_font_size="8pt", text_color="black", angle=pi/4)
p.text(25, 22, text=["Lose ATS"], text_font_size="8pt", text_color="red", angle=pi/4)
p.xaxis.axis_label = "Home Team Favored By"
p.xaxis.axis_label_text_font_size = "12pt"
p.yaxis.axis_label = "Home Team Points Difference"
p.yaxis.axis_label_text_font_size = "12pt"
# p.legend.location = "top_left"
# legend = Legend(legends=[
# ("sin(x)", [g1_r]),
# ("2*sin(x)", [g2_r])
# ], location=(0, -30))
# p.add_layout(legend, 'right')
# show the results
show(p)
import csv, os
import numpy as np
import pandas as pd
# print pd.__version__
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline
home_lines = pd.read_csv('data/ATS_SUP_home_conf.csv', sep=';')
print home_lines.shape
print home_lines.dtypes
print ''
print home_lines.head() # only first 3 weeks of season at this point
# (48, 9)
# week object
# away_short_name object
# home_short_name object
# away_score int64
# home_score int64
# home_line float64
# home_conf object
# home_favBy float64
# home_ptd int64
# dtype: object
# week away_short_name home_short_name away_score home_score home_line \
# 0 Week 1 CAR DEN 20 21 3.5
# 1 Week 1 BUF BAL 7 13 -3.5
# 2 Week 1 CHI HOU 14 23 -6.5
# 3 Week 1 CIN NYJ 23 22 2.5
# 4 Week 1 CLE PHI 10 29 -3.5
# home_conf home_favBy home_ptd
# 0 AFC -3.5 1
# 1 AFC 3.5 6
# 2 AFC 6.5 9
# 3 AFC -2.5 -1
# 4 NFC 3.5 19
home_lines['abs_line_ptd'] = np.absolute(home_lines.home_favBy - home_lines.home_ptd)
### now, the plot
# # not a perfect way of making smaller points visible // get over it
s_afc = (home_lines[home_lines.home_conf=='AFC']['abs_line_ptd'] + 1)*10
s_nfc = (home_lines[home_lines.home_conf=='NFC']['abs_line_ptd'] + 1)*10
ax = home_lines[home_lines.home_conf=='AFC']\
.plot.scatter(x='home_favBy',
y='home_ptd', color='red',
s=s_afc,
alpha=0.5,
label='AFC',
figsize=(8,8)
)
home_lines[home_lines.home_conf=='NFC']\
.plot.scatter(x='home_favBy',
y='home_ptd', color='blue',
s=s_nfc,
alpha=0.5,
label='NFC', ax=ax
)
ats = np.linspace(-35,35,100)
fav = np.linspace(-35,35,100)
ptd = np.linspace(-35,35,100)
plt.plot(ats, ats, 'g--')
plt.plot(fav, np.zeros(100), 'k-')
plt.plot(np.zeros(100), ptd, 'k-')
plt.xlim([-35,35])
plt.ylim([-35,35])
plt.title("2016 Home Performance SUP & ATS\n(through week 3)")
plt.xlabel("Home Team Favored By", fontweight='bold')
plt.ylabel("Home Point Difference", fontweight='bold')
plt.legend(bbox_to_anchor=(0, 1), loc='lower right', ncol=1)
plt.text(25, 1, 'Win SUP', fontsize=10, fontweight='bold', color='darkblue')
plt.text(25, -2.5, 'Lose SUP', fontsize=10, fontweight='bold', color='dodgerblue')
plt.text(25, 32, 'Win ATS', fontsize=10, fontweight='bold', rotation=45)
plt.text(27.5, 30.5, 'Lose ATS', fontsize=10, fontweight='bold', color='red', rotation=45)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment