Skip to content

Instantly share code, notes, and snippets.

View jes-moore's full-sized avatar

Jesse Moore jes-moore

View GitHub Profile
#!/bin/bash
#Variables
csv_files='ls /path/to/csv/files/*.csv'
out_file='/path/to/output/folder'
file_locs=$out_file/files.txt
fits_out=$out_file/merged.fits
hdf5_out=$out_file/merged.hdf5
# Create Txt File of Locations
# Generate linear fit and chart 1
slope, intercept, r_value, p_value, std_err = stats.linregress(
seasonal_df['rebound_ratio'], seasonal_df['win'])
line = slope * seasonal_df['rebound_ratio'].values + intercept
ch1_data = go.Scatter(
x=seasonal_df['rebound_ratio'].values,
y=seasonal_df['win'].values,
mode='markers',
marker=go.Marker(color='rgb(255, 127, 14)'),
def get_seasonal_reb_df():
shot_to_reb = load_rebounds_df()
## Use only regular season
shot_to_reb = shot_to_reb[shot_to_reb.type == 'R']
rebound_df = shot_to_reb.groupby(['for_name', 'season', 'lead_to_reb']).agg({
'counter':
'count'
}).reset_index()
reb_goals = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff <= 3, 'numGoals'].sum()
reb_shots = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff <= 3, 'numShots'].sum()
norm_goals = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff > 3, 'numGoals'].sum()
norm_shots = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff > 3, 'numShots'].sum()
## Data
data = go.Bar(
x=['Shots', 'Goals'],
y=[reb_shots / norm_shots, reb_goals / norm_goals],
def agg_rebounds_df2():
## Aggregate shots and goals
shot_df = load_rebounds_df()
shot_df_agg2 = shot_df.groupby(['shotTimeDiff', 'nextShotResult']).agg({
'counter':
'sum'
}).reset_index()
## Create pivot table
shot_df_agg2 = shot_df_agg2.pivot_table(
## Scoring Ratio Chart
shotTimeData = go.Scatter(
x=shot_df_agg.loc[0:20, 'shotTimeDiff'],
y=shot_df_agg.loc[0:20, 'goalRatio'],
mode='markers+lines',
text=shot_df_agg.loc[0:20, 'goalRatioText'],
hoverinfo='text')
data = [shotTimeData]
## Layout
def agg_rebounds_df():
shot_df = load_rebounds_df()
# Aggregate by Time Differential
shot_df = shot_df[shot_df.shotTimeDiff > 0]
shot_df_agg = shot_df.groupby('shotTimeDiff').agg({'counter':'sum'})
# Aggregate shots and goals
shot_df_agg = shot_df.groupby(['shotTimeDiff','nextShotResult'])\
.agg({'counter':'sum'}).reset_index()
def load_rebounds_df():
shot_df, unique_games = load_expanded_shooting_df()
shot_df['shotTimeDiff'] = 0
shot_df['nextShotResult'] = 'None'
shot_df['nextShotX'] = 0
shot_df['nextShotY'] = 0
for gameid in tqdm(unique_games):
game_shot_df =\
shot_df.loc[shot_df.game_id == gameid, :].sort_values('play_num')
def load_expanded_shooting_df():
shot_df = load_shooting_df()
# Load Team Info
team_info = pd.read_csv('data/team_info.csv')
team_info['combined_name'] = team_info.shortName + ' ' + team_info.teamName
team_info.drop(
['franchiseId', 'shortName', 'teamName', 'abbreviation', 'link'],
axis=1,
inplace=True)
import pandas as pd
from tqdm import tqdm
import time
def load_shooting_df():
# Get Shot on and Against
plays_df = pd.read_csv('data/game_plays.csv')
shooting_df = plays_df.loc[plays_df.event.isin(['Goal', 'Shot']), :]
# Format Shot by and Goalie