Last active
June 26, 2023 19:13
-
-
Save endlesspint8/2eaae1e452ce7d5a5edd46277c0459fb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
import matplotlib | |
matplotlib.style.use('ggplot') | |
%matplotlib inline | |
# team xref information | |
team_xref = pd.read_csv('data/team_xref.csv', sep=';') | |
print team_xref.shape | |
team_xref.head() | |
# Out | |
# (32, 4) | |
# long_name short_name conference division | |
# 0 Arizona Cardinals ARI NFC NFC West | |
# 1 Atlanta Falcons ATL NFC NFC South | |
# 2 Baltimore Ravens BAL AFC AFC North | |
# 3 Buffalo Bills BUF AFC AFC East | |
# 4 Carolina Panthers CAR NFC NFC South | |
# import 538 data | |
fivthir_apriori = [] | |
with open('data/538_apriori_sup_season2.csv', 'r') as f: | |
reader = csv.reader(f, delimiter=';') | |
next(reader) | |
for row in reader: | |
fivthir_apriori.append((row[0], row[1], row[2], row[3], row[4], row[5], float(row[6]))) | |
print len(fivthir_apriori) | |
print "away\thome\taway_conf\thome_conf\taway_div\thome_div\taway_prob" | |
fivthir_apriori[:5] | |
# Out | |
# 256 | |
# away home away_conf home_conf away_div home_div away_prob | |
# [('CAR', 'DEN', 'NFC', 'AFC', 'NFC South', 'AFC West', 0.4), | |
# ('BUF', 'BAL', 'AFC', 'AFC', 'AFC East', 'AFC North', 0.47), | |
# ('CHI', 'HOU', 'NFC', 'AFC', 'NFC North', 'AFC South', 0.33), | |
# ('CIN', 'NYJ', 'AFC', 'AFC', 'AFC North', 'AFC East', 0.49), | |
# ('CLE', 'PHI', 'AFC', 'NFC', 'AFC North', 'NFC East', 0.29)] | |
# season sim | |
np.random.seed(538) | |
def season_sims(game_probabilities, seasons=100): | |
team_win_sim_count = {team: {i:0 for i in range(17)} for team in team_xref.short_name} | |
away_win_probs = np.array([game[6] for game in game_probabilities]) | |
samp_season = np.random.random((seasons, len(away_win_probs))) | |
# samp_season | |
samp_away_wins = samp_season < away_win_probs | |
samp_home_wins = np.ones((seasons, len(away_win_probs))) - samp_away_wins | |
for i in range(seasons): | |
samp_seasons_cnt = Counter() | |
for game,away_w,home_w in zip(game_probabilities, samp_away_wins[i], samp_home_wins[i]): | |
away_team, home_team, game_prob = game[0], game[1], game[6] | |
samp_seasons_cnt[away_team] += away_w | |
samp_seasons_cnt[home_team] += home_w | |
for team in samp_seasons_cnt: | |
team_win_sim_count[team][int(samp_seasons_cnt[team])] += 1 | |
return team_win_sim_count | |
tenK_seasons_538 = pd.DataFrame(season_sims(fivthir_apriori,10000)).T | |
tenK_seasons_538 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
five38 = winConf[winConf.FTE.notnull()][['win_prob', 'FTE', 'FTE_cnt']] | |
print five38.head() | |
# win_prob FTE FTE_cnt | |
# 1 0.50 0.000000 4 | |
# 2 0.51 0.666667 6 | |
# 3 0.52 0.555556 9 | |
# 4 0.53 0.625000 8 | |
# 5 0.54 0.400000 10 | |
buckets = np.arange(.5,1.05,.1) | |
def bucketize(df): | |
conf_list = [] | |
win_list = [] | |
cnt_list = [] | |
for i in range(len(buckets)-1): | |
buck_min, buck_max = buckets[i], buckets[i+1] | |
cnt_games = np.sum(df[(df.iloc[:,0]>=buck_min) & (df.iloc[:,0]<buck_max)].iloc[:,2]) | |
per_conf = np.round(np.sum(df[(df.iloc[:,0]>=buck_min) & (df.iloc[:,0]<buck_max)].iloc[:,0] \ | |
* df[(df.iloc[:,0]>=buck_min) & (df.iloc[:,0]<buck_max)].iloc[:,2])/ cnt_games, 2) | |
per_win = np.round(np.sum(df[(df.iloc[:,0]>=buck_min) & (df.iloc[:,0]<buck_max)].iloc[:,1] \ | |
* df[(df.iloc[:,0]>=buck_min) & (df.iloc[:,0]<buck_max)].iloc[:,2])/ cnt_games, 2) | |
conf_list.append(per_conf) | |
win_list.append(per_win) | |
cnt_list.append(cnt_games) | |
return (conf_list, win_list, cnt_list) | |
bucketize(five38) | |
# ([0.55000000000000004, | |
# 0.65000000000000002, | |
# 0.73999999999999999, | |
# 0.81999999999999995, | |
# 0.90000000000000002], | |
# [0.54000000000000004, | |
# 0.57999999999999996, | |
# 0.70999999999999996, | |
# 0.84999999999999998, | |
# 1.0], | |
# [85L, 60L, 42L, 20L, 1L]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
import matplotlib | |
matplotlib.style.use('ggplot') | |
%matplotlib inline | |
# team xref information | |
team_xref = pd.read_csv('data/team_xref.csv', sep=';') | |
print team_xref.shape | |
team_xref.head() | |
# Out | |
# (32, 4) | |
# long_name short_name conference division | |
# 0 Arizona Cardinals ARI NFC NFC West | |
# 1 Atlanta Falcons ATL NFC NFC South | |
# 2 Baltimore Ravens BAL AFC AFC North | |
# 3 Buffalo Bills BUF AFC AFC East | |
# 4 Carolina Panthers CAR NFC NFC South | |
# import data | |
fox_apriori = [] | |
with open('data/FOX_apriori_sup_season.csv', 'r') as f: | |
reader = csv.reader(f, delimiter=';') | |
next(reader) | |
for row in reader: | |
fox_apriori.append((row[0], row[1], row[2], row[3], row[4], row[5], float(row[6]))) | |
print len(fox_apriori) | |
print "away\thome\taway_conf\thome_conf\taway_div\thome_div\taway_prob" | |
fox_apriori[:5] | |
# Out | |
# 256 | |
# away home away_conf home_conf away_div home_div away_prob | |
# [('CAR', 'DEN', 'NFC', 'AFC', 'NFC South', 'AFC West', 0.67), | |
# ('BUF', 'BAL', 'AFC', 'AFC', 'AFC East', 'AFC North', 0.553), | |
# ('CHI', 'HOU', 'NFC', 'AFC', 'NFC North', 'AFC South', 0.287), | |
# ('CIN', 'NYJ', 'AFC', 'AFC', 'AFC North', 'AFC East', 0.479), | |
# ('CLE', 'PHI', 'AFC', 'NFC', 'AFC North', 'NFC East', 0.467)] | |
from collections import defaultdict | |
def season_prob(apriori): | |
simple_prob_cnt = defaultdict(lambda : defaultdict(int)) | |
for game in apriori: | |
away_team, home_team, away_conf, home_conf, \ | |
away_div, home_div, away_prob = game | |
simple_prob_cnt[away_team]['all'] += away_prob | |
simple_prob_cnt[away_team]['away'] += away_prob | |
if away_conf == home_conf: | |
simple_prob_cnt[away_team]['conf'] += away_prob | |
if away_div == home_div: | |
simple_prob_cnt[away_team]['div'] += away_prob | |
simple_prob_cnt[home_team]['all'] += 1.0 - away_prob | |
simple_prob_cnt[home_team]['home'] += 1.0 - away_prob | |
if away_conf == home_conf: | |
simple_prob_cnt[home_team]['conf'] += 1.0 - away_prob | |
if away_div == home_div: | |
simple_prob_cnt[home_team]['div'] += 1.0 - away_prob | |
return simple_prob_cnt | |
d = season_prob(fox_apriori) | |
prob_df = np.around(pd.DataFrame(d), 1).T.fillna(0) | |
prob_cols = prob_df.columns | |
prob_df.columns = ['win_total', 'road_wins', 'conf_wins', 'div_wins', 'home_wins'] | |
prob_df = prob_df[['win_total', 'home_wins', 'road_wins', 'div_wins', 'conf_wins']] | |
prob_standings_fox = team_xref.merge(prob_df, left_on='short_name', right_index=True)\ | |
.sort_values(['conference', 'division', 'win_total', | |
'div_wins', 'conf_wins'], ascending=[1, 1, 0, 0, 0]) | |
print prob_standings_fox.head() | |
# Out | |
# long_name short_name conference division win_total \ | |
# 19 New England Patriots NE AFC AFC East 9.5 | |
# 22 New York Jets NYJ AFC AFC East 9.4 | |
# 3 Buffalo Bills BUF AFC AFC East 7.9 | |
# 17 Miami Dolphins MIA AFC AFC East 6.8 | |
# 25 Pittsburgh Steelers PIT AFC AFC North 10.7 | |
# home_wins road_wins div_wins conf_wins | |
# 19 4.5 5.0 3.5 7.2 | |
# 22 4.7 4.7 3.5 6.9 | |
# 3 3.9 4.0 2.8 5.8 | |
# 17 3.4 3.3 2.2 4.9 | |
# 25 5.1 5.6 4.2 7.7 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
for src in [cbs, espn, five38, fox]: | |
# diff, squared, * game count, summed & divided by total games per src | |
print np.sqrt(np.sum(np.square(np.array(bucketize(src)[0]) - np.array(bucketize(src)[1])) * np.array(bucketize(src)[2]))/np.sum(bucketize(src)[2])) | |
# 0.177597481358 | |
# 0.165536714496 | |
# 0.0420793664035 | |
# 0.0830922795636 | |
def rmse_weighted(df): | |
return np.sqrt(np.sum(np.square(df.iloc[:,0] - df.iloc[:,1]) * df.iloc[:,2])/np.sum(df.iloc[:,2])) | |
for src in [cbs, espn, five38, fox]: | |
print rmse_weighted(src) | |
# 0.181041021208 | |
# 0.16749439121 | |
# 0.190953289846 | |
# 0.212558319291 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# inspired by: | |
# http://stackoverflow.com/questions/33019879/hierarchic-pie-donut-chart-from-pandas-dataframe-using-bokeh-or-matplotlib | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import matplotlib | |
matplotlib.style.use('ggplot') | |
%matplotlib inline | |
correct_per = [0.875, 0.636, 0.500, 0.500, 0.583, 0.556, 0.625] | |
correct_per = [[i] for i in correct_per] | |
# "compounded" correct picks | |
for i in range(1, len(correct_per)): | |
correct_per[i][0] = correct_per[i][0] * correct_per[i-1][0] | |
WL_pairs = [[i[0], 1.0-i[0]] for i in correct_per] | |
for i in range(len(WL_pairs)): | |
if i == 0: | |
WL_pairs[i].append(0) | |
else: | |
WL_pairs[i].append(np.sum(WL_pairs[i-1][1:])) | |
WL_pairs[i][1] -= WL_pairs[i][2] | |
print WL_pairs | |
print [np.sum(i) for i in WL_pairs] | |
colors = ['dodgerblue', 'darkblue', 'black'] | |
plt.figure(figsize=(10,10)) | |
plt.pie(WL_pairs[-1], colors=colors) | |
ax = plt.gca() | |
for i in np.arange(len(WL_pairs)-2,-1,-1): | |
ax.pie(WL_pairs[i], colors=colors, | |
radius=np.float(i+1)/float(len(WL_pairs))) | |
ax.set_aspect('equal') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv, os | |
import numpy as np | |
import pandas as pd | |
# print pd.__version__ | |
from datetime import datetime | |
import matplotlib.pyplot as plt | |
import matplotlib | |
matplotlib.style.use('ggplot') | |
%matplotlib inline | |
home_lines = pd.read_csv('data/ATS_SUP_home_conf.csv', sep=';') | |
print home_lines.shape | |
print home_lines.dtypes | |
print '' | |
print home_lines.head() # only first 3 weeks of season at this point | |
# (48, 9) | |
# week object | |
# away_short_name object | |
# home_short_name object | |
# away_score int64 | |
# home_score int64 | |
# home_line float64 | |
# home_conf object | |
# home_favBy float64 | |
# home_ptd int64 | |
# dtype: object | |
# week away_short_name home_short_name away_score home_score home_line \ | |
# 0 Week 1 CAR DEN 20 21 3.5 | |
# 1 Week 1 BUF BAL 7 13 -3.5 | |
# 2 Week 1 CHI HOU 14 23 -6.5 | |
# 3 Week 1 CIN NYJ 23 22 2.5 | |
# 4 Week 1 CLE PHI 10 29 -3.5 | |
# home_conf home_favBy home_ptd | |
# 0 AFC -3.5 1 | |
# 1 AFC 3.5 6 | |
# 2 AFC 6.5 9 | |
# 3 AFC -2.5 -1 | |
# 4 NFC 3.5 19 | |
home_lines['ats_ptd'] = home_lines.home_ptd - home_lines.home_favBy | |
home_lines['abs_line_ptd'] = np.absolute(home_lines.home_favBy - home_lines.home_ptd) | |
# not a perfect way of making smaller points visible // get over it | |
home_lines['marker_size'] = home_lines.abs_line_ptd + 2.0/home_lines.abs_line_ptd | |
### now, the plot | |
from math import pi | |
from bokeh.plotting import figure, output_file, output_notebook, show, ColumnDataSource | |
from bokeh.models import HoverTool, Legend, Circle, Square | |
output_notebook() | |
output_file("sup_ats.html") | |
bk_afc = home_lines[home_lines.home_conf=='AFC'] | |
bk_nfc = home_lines[home_lines.home_conf=='NFC'] | |
source_afc = ColumnDataSource(data=bk_afc) | |
source_nfc = ColumnDataSource(data=bk_nfc) | |
TOOLS = "pan,wheel_zoom,reset,save" | |
p = figure(plot_width=600, plot_height=600, tools=TOOLS, | |
title="2016 Performance SUP & ATS (through week 3)" | |
) | |
# http://stackoverflow.com/questions/29435200/bokeh-plotting-enable-tooltips-for-only-some-glyphs | |
g1 = Circle(x='home_favBy', y='home_ptd', | |
size='marker_size', fill_color="firebrick", | |
line_width=0.5, | |
fill_alpha=0.5) | |
g1_r = p.add_glyph(source_or_glyph=source_afc, glyph=g1) | |
g1_hover = HoverTool(renderers=[g1_r], | |
tooltips=[("Game", "@away_short_name at @home_short_name"), | |
("Final Score", "@away_score - @home_score"), | |
("Home Line", "@home_line"), | |
("Home Conf", "@home_conf"), | |
("Week", "@week")]) | |
p.add_tools(g1_hover) | |
g2 = Square(x='home_favBy', y='home_ptd', | |
size='marker_size', fill_color="navy", | |
line_width=0.5, | |
fill_alpha=0.5) | |
g2_r = p.add_glyph(source_or_glyph=source_nfc, glyph=g2) | |
g2_hover = HoverTool(renderers=[g2_r], | |
tooltips=[("Game", "@away_short_name at @home_short_name"), | |
("Final Score", "@away_score - @home_score"), | |
("Home Line", "@home_line"), | |
("Home Conf", "@home_conf"), | |
("Week", "@week")]) | |
p.add_tools(g2_hover) | |
p.multi_line([[-35, 0, 35], [0, 0, 0]], | |
[[0, 0, 0], [-35, 0, 35]], | |
color="black", | |
alpha=0.8, | |
line_width=2 | |
) | |
p.line([-30, 0, 30], [-30, 0, 30], line_width=2, line_dash=[4, 4]) | |
p.text(26, 1, text=["Win SUP"], text_font_size="8pt", text_color="darkblue") | |
p.text(26, -3, text=["Lose SUP"], text_font_size="8pt", text_color="dodgerblue") | |
p.text(23, 24, text=["Win ATS"], text_font_size="8pt", text_color="black", angle=pi/4) | |
p.text(25, 22, text=["Lose ATS"], text_font_size="8pt", text_color="red", angle=pi/4) | |
p.xaxis.axis_label = "Home Team Favored By" | |
p.xaxis.axis_label_text_font_size = "12pt" | |
p.yaxis.axis_label = "Home Team Points Difference" | |
p.yaxis.axis_label_text_font_size = "12pt" | |
# p.legend.location = "top_left" | |
# legend = Legend(legends=[ | |
# ("sin(x)", [g1_r]), | |
# ("2*sin(x)", [g2_r]) | |
# ], location=(0, -30)) | |
# p.add_layout(legend, 'right') | |
# show the results | |
show(p) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv, os | |
import numpy as np | |
import pandas as pd | |
# print pd.__version__ | |
from datetime import datetime | |
import matplotlib.pyplot as plt | |
import matplotlib | |
matplotlib.style.use('ggplot') | |
%matplotlib inline | |
home_lines = pd.read_csv('data/ATS_SUP_home_conf.csv', sep=';') | |
print home_lines.shape | |
print home_lines.dtypes | |
print '' | |
print home_lines.head() # only first 3 weeks of season at this point | |
# (48, 9) | |
# week object | |
# away_short_name object | |
# home_short_name object | |
# away_score int64 | |
# home_score int64 | |
# home_line float64 | |
# home_conf object | |
# home_favBy float64 | |
# home_ptd int64 | |
# dtype: object | |
# week away_short_name home_short_name away_score home_score home_line \ | |
# 0 Week 1 CAR DEN 20 21 3.5 | |
# 1 Week 1 BUF BAL 7 13 -3.5 | |
# 2 Week 1 CHI HOU 14 23 -6.5 | |
# 3 Week 1 CIN NYJ 23 22 2.5 | |
# 4 Week 1 CLE PHI 10 29 -3.5 | |
# home_conf home_favBy home_ptd | |
# 0 AFC -3.5 1 | |
# 1 AFC 3.5 6 | |
# 2 AFC 6.5 9 | |
# 3 AFC -2.5 -1 | |
# 4 NFC 3.5 19 | |
home_lines['abs_line_ptd'] = np.absolute(home_lines.home_favBy - home_lines.home_ptd) | |
### now, the plot | |
# # not a perfect way of making smaller points visible // get over it | |
s_afc = (home_lines[home_lines.home_conf=='AFC']['abs_line_ptd'] + 1)*10 | |
s_nfc = (home_lines[home_lines.home_conf=='NFC']['abs_line_ptd'] + 1)*10 | |
ax = home_lines[home_lines.home_conf=='AFC']\ | |
.plot.scatter(x='home_favBy', | |
y='home_ptd', color='red', | |
s=s_afc, | |
alpha=0.5, | |
label='AFC', | |
figsize=(8,8) | |
) | |
home_lines[home_lines.home_conf=='NFC']\ | |
.plot.scatter(x='home_favBy', | |
y='home_ptd', color='blue', | |
s=s_nfc, | |
alpha=0.5, | |
label='NFC', ax=ax | |
) | |
ats = np.linspace(-35,35,100) | |
fav = np.linspace(-35,35,100) | |
ptd = np.linspace(-35,35,100) | |
plt.plot(ats, ats, 'g--') | |
plt.plot(fav, np.zeros(100), 'k-') | |
plt.plot(np.zeros(100), ptd, 'k-') | |
plt.xlim([-35,35]) | |
plt.ylim([-35,35]) | |
plt.title("2016 Home Performance SUP & ATS\n(through week 3)") | |
plt.xlabel("Home Team Favored By", fontweight='bold') | |
plt.ylabel("Home Point Difference", fontweight='bold') | |
plt.legend(bbox_to_anchor=(0, 1), loc='lower right', ncol=1) | |
plt.text(25, 1, 'Win SUP', fontsize=10, fontweight='bold', color='darkblue') | |
plt.text(25, -2.5, 'Lose SUP', fontsize=10, fontweight='bold', color='dodgerblue') | |
plt.text(25, 32, 'Win ATS', fontsize=10, fontweight='bold', rotation=45) | |
plt.text(27.5, 30.5, 'Lose ATS', fontsize=10, fontweight='bold', color='red', rotation=45) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment