Skip to content

Instantly share code, notes, and snippets.

@raleighlittles
Created October 11, 2022 05:49
Show Gist options
  • Save raleighlittles/1b9f4463ed07169009f328405ef84f00 to your computer and use it in GitHub Desktop.
Save raleighlittles/1b9f4463ed07169009f328405ef84f00 to your computer and use it in GitHub Desktop.
Script for generating results of Superbowl Squares score analysis

About

This script is used to visualize the probability of score results during the superbowl.

To start, download the CSV file containing the entire history of Superbowl scores.

Link: https://www.pro-football-reference.com/super-bowl/

Result

output image

Code

import pandas
import pdb


sb_dataframe = pandas.read_csv('super-bowl-data-pro-football-reference.csv', mangle_dupe_cols=True)

t1_scores, t2_scores = sb_dataframe["Pts"].tolist(), sb_dataframe["Pts.1"].tolist()

t1_final_digit_scores, t2_final_digit_scores = dict(), dict()

# Populate the dict with the keys (the digits 0 through 9)
# So you don't have to later check if the key exists
for i in range(0, 10):
    t1_final_digit_scores[str(i)] = 0
    t2_final_digit_scores[str(i)] = 0

for sb_num in range(0, len(t1_scores)):
    # Iterate through all the SB scores. Get the ending digit of each team's score,
    # and use that as the key in the dictionary.
    # By the time this loop completes, you'll have 2 dictionaries, with 10 keys (0 through 9), where the value for each
    # key corresponds to the number of times that team's score ended in that digit.
    final_digit_t1_score, final_digit_t2_score = str(t1_scores[sb_num])[-1], str(t2_scores[sb_num])[-1]

    t1_final_digit_scores[final_digit_t1_score] += 1
    t2_final_digit_scores[final_digit_t2_score] += 1


# Normalize the counts to create probability
num_of_superbowls = len(t1_scores)
t1_final_digit_scores.update({n: round(t1_final_digit_scores[n] / num_of_superbowls, 2) for n in t1_final_digit_scores.keys()})
t2_final_digit_scores.update({n: round(t2_final_digit_scores[n] / num_of_superbowls, 2) for n in t2_final_digit_scores.keys()})

#print(t1_final_digit_scores)
#print(t2_final_digit_scores)

# Iterate over both dictionaries, and combine the probabilities
final_scores = {(k1,k2): (v1+v2)/2 for k1,v1 in t1_final_digit_scores.items() for k2,v2 in t2_final_digit_scores.items()}

# Now put this into a dataframe for visualization
final_scores_df = pandas.DataFrame(columns=('T1', 'T2', '%'))

for index in range(0, len(final_scores)):
    key_x_y_tuple = list(final_scores)[index]
    final_scores_df.loc[index] = [key_x_y_tuple[0], key_x_y_tuple[1], final_scores[key_x_y_tuple]]

pdb.set_trace()

pivotted = final_scores_df.pivot('T2', 'T1', '%')
hmap = seaborn.heatmap(pivotted)
fig = hmap.get_figure()
fig.savefig("out.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment