Skip to content

Instantly share code, notes, and snippets.

@micahmelling
Created February 3, 2024 03:28
Show Gist options
  • Save micahmelling/65badf7bc58a93eadc606cce8bca58cc to your computer and use it in GitHub Desktop.
Save micahmelling/65badf7bc58a93eadc606cce8bca58cc to your computer and use it in GitHub Desktop.
from functools import reduce
import warnings
import jenkspy
import pandas as pd
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
def time_to_float(time_str):
minutes, seconds = map(int, time_str.split(':'))
return float(minutes * 60 + seconds)
def clean_data(df):
df['Start'] = df['Start'].str.replace('Own ', '')
df['Start'] = df['Start'].astype(float)
df['Time'] = df['Time'].apply(time_to_float)
df = df.loc[df['Tm'] != 'League Total']
return df
def create_bins(df, cols, classes=4):
for col in cols:
res = jenkspy.jenks_breaks(df[col], n_classes=classes)
col_name = f'{col}_bin'
df[col_name] = pd.cut(df[col], bins=res, include_lowest=True, labels=False, duplicates='drop')
df[col_name] = df[col_name] + 1
return df
def prep_data(df, analyze_cols):
df = clean_data(df)
df = create_bins(df, analyze_cols)
return df
def process_data(path, analyze_cols):
offense_df = pd.read_csv(path, skiprows=1)
offense_df = prep_data(offense_df, analyze_cols)
return offense_df
def create_analysis_dataframe(offense_data_path, defense_data_path, analyze_cols, analyze_teams):
offense_df = process_data(offense_data_path, analyze_cols)
defense_df = process_data(defense_data_path, analyze_cols)
offense_df = offense_df.loc[offense_df['Tm'].isin(analyze_teams)]
defense_df = defense_df.loc[defense_df['Tm'].isin(analyze_teams)]
output_dfs = []
for team in analyze_teams:
team_df = pd.DataFrame()
for col in analyze_cols:
col = f'{col}_bin'
offense_val = offense_df.loc[offense_df['Tm'] == team][col].iloc[0]
defense_val = defense_df.loc[defense_df['Tm'] == team][col].iloc[0]
team_col_df = pd.DataFrame({
'stat': [col],
f'offensive_bin_{team}': [offense_val],
f'defensive_bin_{team}': [defense_val]
})
team_df = pd.concat([team_df, team_col_df], axis=0)
team_df = team_df.reset_index(drop=True)
output_dfs.append(team_df)
analysis_df = reduce(lambda left, right: pd.merge(left, right, on='stat', how='inner'), output_dfs)
return analysis_df
if __name__ == "__main__":
# https://www.pro-football-reference.com/years/2023/
drives_analysis_df = create_analysis_dataframe(
offense_data_path='drive_averages.csv',
defense_data_path='drives_against.csv',
analyze_cols=['Sc%', 'TO%', 'Plays.1', 'Yds', 'Start', 'Time', 'Pts'],
analyze_teams=['Kansas City Chiefs', 'San Francisco 49ers']
)
print(drives_analysis_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment