Jesse Moore jes-moore

## convert_to_hdfs.sh
#!/bin/bash

#Variables
csv_files='ls /path/to/csv/files/*.csv'
out_file='/path/to/output/folder'
file_locs=$out_file/files.txt
fits_out=$out_file/merged.fits
hdf5_out=$out_file/merged.hdf5

# Create Txt File of Locations

## compare_wins_shots.py
# Generate linear fit and chart 1
slope, intercept, r_value, p_value, std_err = stats.linregress(
    seasonal_df['rebound_ratio'], seasonal_df['win'])
line = slope * seasonal_df['rebound_ratio'].values + intercept

ch1_data = go.Scatter(
    x=seasonal_df['rebound_ratio'].values,
    y=seasonal_df['win'].values,
    mode='markers',
    marker=go.Marker(color='rgb(255, 127, 14)'),

## seasonal_df.py
def get_seasonal_reb_df():
    shot_to_reb = load_rebounds_df()
    ## Use only regular season
    shot_to_reb = shot_to_reb[shot_to_reb.type == 'R']

    rebound_df = shot_to_reb.groupby(['for_name', 'season', 'lead_to_reb']).agg({
        'counter':
        'count'
    }).reset_index()

## prop_goals_shots.py
reb_goals = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff <= 3, 'numGoals'].sum()
reb_shots = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff <= 3, 'numShots'].sum()

norm_goals = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff > 3, 'numGoals'].sum()
norm_shots = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff > 3, 'numShots'].sum()

## Data
data = go.Bar(
    x=['Shots', 'Goals'],
    y=[reb_shots / norm_shots, reb_goals / norm_goals],

## agg2.py
def agg_rebounds_df2():
    ## Aggregate shots and goals
    shot_df = load_rebounds_df()
    shot_df_agg2 = shot_df.groupby(['shotTimeDiff', 'nextShotResult']).agg({
        'counter':
        'sum'
    }).reset_index()

    ## Create pivot table
    shot_df_agg2 = shot_df_agg2.pivot_table(

## num_shots_goals.py
## Scoring Ratio Chart
shotTimeData = go.Scatter(
    x=shot_df_agg.loc[0:20, 'shotTimeDiff'],
    y=shot_df_agg.loc[0:20, 'goalRatio'],
    mode='markers+lines',
    text=shot_df_agg.loc[0:20, 'goalRatioText'],
    hoverinfo='text')
data = [shotTimeData]

## Layout

## agg_rebounds_df.py
def agg_rebounds_df():
    shot_df = load_rebounds_df()

    # Aggregate by Time Differential
    shot_df = shot_df[shot_df.shotTimeDiff > 0]
    shot_df_agg = shot_df.groupby('shotTimeDiff').agg({'counter':'sum'})

    # Aggregate shots and goals
    shot_df_agg = shot_df.groupby(['shotTimeDiff','nextShotResult'])\
        .agg({'counter':'sum'}).reset_index()

## rebounds_df.py
def load_rebounds_df():
    shot_df, unique_games = load_expanded_shooting_df()
    shot_df['shotTimeDiff'] = 0
    shot_df['nextShotResult'] = 'None'
    shot_df['nextShotX'] = 0
    shot_df['nextShotY'] = 0
    for gameid in tqdm(unique_games):
        game_shot_df =\
            shot_df.loc[shot_df.game_id == gameid, :].sort_values('play_num')

## load_expanded_shooting_df.py
def load_expanded_shooting_df():
    shot_df = load_shooting_df()

    # Load Team Info
    team_info = pd.read_csv('data/team_info.csv')
    team_info['combined_name'] = team_info.shortName + ' ' + team_info.teamName
    team_info.drop(
        ['franchiseId', 'shortName', 'teamName', 'abbreviation', 'link'],
        axis=1,
        inplace=True)

## shooting_df.py
import pandas as pd
from tqdm import tqdm
import time

def load_shooting_df():
    # Get Shot on and Against
    plays_df = pd.read_csv('data/game_plays.csv')
    shooting_df = plays_df.loc[plays_df.event.isin(['Goal', 'Shot']), :]

    # Format Shot by and Goalie
	#!/bin/bash

	#Variables
	csv_files='ls /path/to/csv/files/*.csv'
	out_file='/path/to/output/folder'
	file_locs=$out_file/files.txt
	fits_out=$out_file/merged.fits
	hdf5_out=$out_file/merged.hdf5

	# Create Txt File of Locations
	# Generate linear fit and chart 1
	slope, intercept, r_value, p_value, std_err = stats.linregress(
	seasonal_df['rebound_ratio'], seasonal_df['win'])
	line = slope * seasonal_df['rebound_ratio'].values + intercept

	ch1_data = go.Scatter(
	x=seasonal_df['rebound_ratio'].values,
	y=seasonal_df['win'].values,
	mode='markers',
	marker=go.Marker(color='rgb(255, 127, 14)'),
	def get_seasonal_reb_df():
	shot_to_reb = load_rebounds_df()
	## Use only regular season
	shot_to_reb = shot_to_reb[shot_to_reb.type == 'R']

	rebound_df = shot_to_reb.groupby(['for_name', 'season', 'lead_to_reb']).agg({
	'counter':
	'count'
	}).reset_index()
	reb_goals = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff <= 3, 'numGoals'].sum()
	reb_shots = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff <= 3, 'numShots'].sum()

	norm_goals = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff > 3, 'numGoals'].sum()
	norm_shots = shot_df_agg2.loc[shot_df_agg2.shotTimeDiff > 3, 'numShots'].sum()

	## Data
	data = go.Bar(
	x=['Shots', 'Goals'],
	y=[reb_shots / norm_shots, reb_goals / norm_goals],
	def agg_rebounds_df2():
	## Aggregate shots and goals
	shot_df = load_rebounds_df()
	shot_df_agg2 = shot_df.groupby(['shotTimeDiff', 'nextShotResult']).agg({
	'counter':
	'sum'
	}).reset_index()

	## Create pivot table
	shot_df_agg2 = shot_df_agg2.pivot_table(
	## Scoring Ratio Chart
	shotTimeData = go.Scatter(
	x=shot_df_agg.loc[0:20, 'shotTimeDiff'],
	y=shot_df_agg.loc[0:20, 'goalRatio'],
	mode='markers+lines',
	text=shot_df_agg.loc[0:20, 'goalRatioText'],
	hoverinfo='text')
	data = [shotTimeData]

	## Layout
	def agg_rebounds_df():
	shot_df = load_rebounds_df()

	# Aggregate by Time Differential
	shot_df = shot_df[shot_df.shotTimeDiff > 0]
	shot_df_agg = shot_df.groupby('shotTimeDiff').agg({'counter':'sum'})

	# Aggregate shots and goals
	shot_df_agg = shot_df.groupby(['shotTimeDiff','nextShotResult'])\
	.agg({'counter':'sum'}).reset_index()
	def load_rebounds_df():
	shot_df, unique_games = load_expanded_shooting_df()
	shot_df['shotTimeDiff'] = 0
	shot_df['nextShotResult'] = 'None'
	shot_df['nextShotX'] = 0
	shot_df['nextShotY'] = 0
	for gameid in tqdm(unique_games):
	game_shot_df =\
	shot_df.loc[shot_df.game_id == gameid, :].sort_values('play_num')
	def load_expanded_shooting_df():
	shot_df = load_shooting_df()

	# Load Team Info
	team_info = pd.read_csv('data/team_info.csv')
	team_info['combined_name'] = team_info.shortName + ' ' + team_info.teamName
	team_info.drop(
	['franchiseId', 'shortName', 'teamName', 'abbreviation', 'link'],
	axis=1,
	inplace=True)
	import pandas as pd
	from tqdm import tqdm
	import time

	def load_shooting_df():
	# Get Shot on and Against
	plays_df = pd.read_csv('data/game_plays.csv')
	shooting_df = plays_df.loc[plays_df.event.isin(['Goal', 'Shot']), :]

	# Format Shot by and Goalie