Created
February 3, 2024 03:28
-
-
Save micahmelling/65badf7bc58a93eadc606cce8bca58cc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import reduce | |
import warnings | |
import jenkspy | |
import pandas as pd | |
warnings.filterwarnings('ignore') | |
pd.set_option('display.max_rows', None) | |
pd.set_option('display.max_columns', None) | |
pd.set_option('display.width', None) | |
def time_to_float(time_str): | |
minutes, seconds = map(int, time_str.split(':')) | |
return float(minutes * 60 + seconds) | |
def clean_data(df): | |
df['Start'] = df['Start'].str.replace('Own ', '') | |
df['Start'] = df['Start'].astype(float) | |
df['Time'] = df['Time'].apply(time_to_float) | |
df = df.loc[df['Tm'] != 'League Total'] | |
return df | |
def create_bins(df, cols, classes=4): | |
for col in cols: | |
res = jenkspy.jenks_breaks(df[col], n_classes=classes) | |
col_name = f'{col}_bin' | |
df[col_name] = pd.cut(df[col], bins=res, include_lowest=True, labels=False, duplicates='drop') | |
df[col_name] = df[col_name] + 1 | |
return df | |
def prep_data(df, analyze_cols): | |
df = clean_data(df) | |
df = create_bins(df, analyze_cols) | |
return df | |
def process_data(path, analyze_cols): | |
offense_df = pd.read_csv(path, skiprows=1) | |
offense_df = prep_data(offense_df, analyze_cols) | |
return offense_df | |
def create_analysis_dataframe(offense_data_path, defense_data_path, analyze_cols, analyze_teams): | |
offense_df = process_data(offense_data_path, analyze_cols) | |
defense_df = process_data(defense_data_path, analyze_cols) | |
offense_df = offense_df.loc[offense_df['Tm'].isin(analyze_teams)] | |
defense_df = defense_df.loc[defense_df['Tm'].isin(analyze_teams)] | |
output_dfs = [] | |
for team in analyze_teams: | |
team_df = pd.DataFrame() | |
for col in analyze_cols: | |
col = f'{col}_bin' | |
offense_val = offense_df.loc[offense_df['Tm'] == team][col].iloc[0] | |
defense_val = defense_df.loc[defense_df['Tm'] == team][col].iloc[0] | |
team_col_df = pd.DataFrame({ | |
'stat': [col], | |
f'offensive_bin_{team}': [offense_val], | |
f'defensive_bin_{team}': [defense_val] | |
}) | |
team_df = pd.concat([team_df, team_col_df], axis=0) | |
team_df = team_df.reset_index(drop=True) | |
output_dfs.append(team_df) | |
analysis_df = reduce(lambda left, right: pd.merge(left, right, on='stat', how='inner'), output_dfs) | |
return analysis_df | |
if __name__ == "__main__": | |
# https://www.pro-football-reference.com/years/2023/ | |
drives_analysis_df = create_analysis_dataframe( | |
offense_data_path='drive_averages.csv', | |
defense_data_path='drives_against.csv', | |
analyze_cols=['Sc%', 'TO%', 'Plays.1', 'Yds', 'Start', 'Time', 'Pts'], | |
analyze_teams=['Kansas City Chiefs', 'San Francisco 49ers'] | |
) | |
print(drives_analysis_df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment