This script is used to visualize the probability of score results during the superbowl.
To start, download the CSV file containing the entire history of Superbowl scores.
Link: https://www.pro-football-reference.com/super-bowl/
import pandas
import pdb
sb_dataframe = pandas.read_csv('super-bowl-data-pro-football-reference.csv', mangle_dupe_cols=True)
t1_scores, t2_scores = sb_dataframe["Pts"].tolist(), sb_dataframe["Pts.1"].tolist()
t1_final_digit_scores, t2_final_digit_scores = dict(), dict()
# Populate the dict with the keys (the digits 0 through 9)
# So you don't have to later check if the key exists
for i in range(0, 10):
t1_final_digit_scores[str(i)] = 0
t2_final_digit_scores[str(i)] = 0
for sb_num in range(0, len(t1_scores)):
# Iterate through all the SB scores. Get the ending digit of each team's score,
# and use that as the key in the dictionary.
# By the time this loop completes, you'll have 2 dictionaries, with 10 keys (0 through 9), where the value for each
# key corresponds to the number of times that team's score ended in that digit.
final_digit_t1_score, final_digit_t2_score = str(t1_scores[sb_num])[-1], str(t2_scores[sb_num])[-1]
t1_final_digit_scores[final_digit_t1_score] += 1
t2_final_digit_scores[final_digit_t2_score] += 1
# Normalize the counts to create probability
num_of_superbowls = len(t1_scores)
t1_final_digit_scores.update({n: round(t1_final_digit_scores[n] / num_of_superbowls, 2) for n in t1_final_digit_scores.keys()})
t2_final_digit_scores.update({n: round(t2_final_digit_scores[n] / num_of_superbowls, 2) for n in t2_final_digit_scores.keys()})
#print(t1_final_digit_scores)
#print(t2_final_digit_scores)
# Iterate over both dictionaries, and combine the probabilities
final_scores = {(k1,k2): (v1+v2)/2 for k1,v1 in t1_final_digit_scores.items() for k2,v2 in t2_final_digit_scores.items()}
# Now put this into a dataframe for visualization
final_scores_df = pandas.DataFrame(columns=('T1', 'T2', '%'))
for index in range(0, len(final_scores)):
key_x_y_tuple = list(final_scores)[index]
final_scores_df.loc[index] = [key_x_y_tuple[0], key_x_y_tuple[1], final_scores[key_x_y_tuple]]
pdb.set_trace()
pivotted = final_scores_df.pivot('T2', 'T1', '%')
hmap = seaborn.heatmap(pivotted)
fig = hmap.get_figure()
fig.savefig("out.png")