Skip to content

Instantly share code, notes, and snippets.

@nickwan
Created October 19, 2020 05:09
Show Gist options
  • Save nickwan/a63cff9bb8082199df982f6258f6c05f to your computer and use it in GitHub Desktop.
Save nickwan/a63cff9bb8082199df982f6258f6c05f to your computer and use it in GitHub Desktop.
# data load (with lau's var names)
week1_df = pd.read_csv(weeks_fns[0])
data_dir = '/content/drive/My Drive/nflfastR-data'
data_files = [f'{data_dir}/data/{x}' for x in os.listdir(f"""{data_dir}/data""") if (x.endswith('.parquet')) & ('2018' in x)]
fastr_18 = pd.DataFrame()
for fn in tqdm(reversed(data_files)):
_df = pd.read_parquet(fn)
fastr_18 = fastr_18.append(_df,ignore_index=True)
roster_data = pd.read_csv(f"{data_dir}/roster-data/roster.csv")
roster_data = roster_data.loc[:, ['teamPlayers.gsisId','teamPlayers.nflId']].drop_duplicates().dropna()
# exact copy (minus data load)
fastr_18['passer_gsis_id'] = (fastr_18['passer_id'].str.split('-').str[2].str[-2:] + fastr_18['passer_id'].str.split('-').str[3] + fastr_18['passer_id'].str.split('-').str[4].str[:4]).apply(lambda x: decode_hex(x)[0].decode("utf-8") if(pd.notnull(x)) else x )
fastr_18['passer_gsis_id'] ='00-' + fastr_18.loc[~pd.isna(fastr_18['passer_gsis_id']), 'passer_gsis_id'].astype(str).str.zfill(7)
fastr_18['receiver_gsis_id'] = (fastr_18['passer_id'].str.split('-').str[2].str[-2:] + fastr_18['receiver_id'].str.split('-').str[3] + fastr_18['receiver_id'].str.split('-').str[4].str[:4]).apply(lambda x: decode_hex(x)[0].decode("utf-8") if(pd.notnull(x)) else x )
fastr_18['receiver_gsis_id'] ='00-' + fastr_18.loc[~pd.isna(fastr_18['receiver_gsis_id']), 'receiver_gsis_id'].astype(str).str.zfill(7)
fastr_18['passer_nflId'] = pd.merge(fastr_18[['passer_gsis_id' ]],roster_data[['teamPlayers.nflId','teamPlayers.gsisId']].dropna(),left_on='passer_gsis_id',right_on='teamPlayers.gsisId',how='left')['teamPlayers.nflId']
fastr_18['receiver_nflId'] = pd.merge(fastr_18[['receiver_gsis_id' ]],roster_data[['teamPlayers.nflId','teamPlayers.gsisId']].dropna(),left_on='receiver_gsis_id',right_on='teamPlayers.gsisId',how='left')['teamPlayers.nflId']
# idk how he's avoiding this in his join, but this needs to happen
fastr_18['old_game_id'] = fastr_18['old_game_id'].astype(float)
week1_df = pd.merge(week1_df,fastr_18[['play_id', 'old_game_id','passer_nflId','receiver_nflId']],left_on=['gameId','playId'],right_on=['old_game_id','play_id'],how='left')
week1_df['IsPasser'] = week1_df['nflId'] == week1_df['passer_nflId']
week1_df['IsReceiver'] = week1_df['nflId'] == week1_df['receiver_nflId']
one_play = week1_df.loc[(week1_df['gameId']==2018090909) & (week1_df['playId']==3162), ['gameId', 'playId']].sample(1)
one_play = one_play.merge(week1_df)
outcome_events = ['pass_arrived', 'pass_outcome_incomplete',
'pass_outcome_caught', 'pass_outcome_interception',
'pass_outcome_caught', 'pass_outcome_touchdown']
pass_arrive_frame = one_play.loc[one_play['event'].isin(outcome_events), 'frameId'].min()
one_play.loc[one_play['frameId']==pass_arrive_frame]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment