Skip to content

Instantly share code, notes, and snippets.

@databyjp
Last active February 22, 2020 08:43
Show Gist options
  • Save databyjp/dc593ff0a2d1a84bd85ad80944c84d44 to your computer and use it in GitHub Desktop.
Save databyjp/dc593ff0a2d1a84bd85ad80944c84d44 to your computer and use it in GitHub Desktop.
# ========== GET TEAMS' TRAVEL DATA ==========
# Load teams' schedules
schedule_df = pd.read_csv('srcdata/2020_nba_schedule.csv', index_col=0)
arena_df = arena_df.assign(teamupper=arena_df.teamname.str.replace(' ', '_').str.upper())
def get_home_arena(teamname):
rows = arena_df[arena_df.teamupper == teamname]
return rows
schedule_df = schedule_df.assign(lat=schedule_df.home_team.apply(lambda x: get_home_arena(x)['lat'].values[0]))
schedule_df = schedule_df.assign(lon=schedule_df.home_team.apply(lambda x: get_home_arena(x)['lon'].values[0]))
travel_data_list = list()
for teamname in schedule_df.home_team.unique():
import math
team_sch = schedule_df[(schedule_df.away_team == teamname) | (schedule_df.home_team == teamname)]
team_sch = team_sch.assign(dist=0)
team_sch.reset_index(drop=True, inplace=True)
for i, row in team_sch.iterrows():
if i > 0: # No travel for the first game
# Implementing Haversine formula (https://en.wikipedia.org/wiki/Haversine_formula / https://www.movable-type.co.uk/scripts/latlong.html)
avg_lat = (row['lat'] + team_sch.iloc[i-1]['lat']) / 2
lon_conv = math.cos(math.radians(avg_lat))
lat1_rad = math.radians(team_sch.iloc[i-1]['lat'])
lat2_rad = math.radians(row['lat'])
lat_dist = math.radians(row['lat'] - team_sch.iloc[i-1]['lat'])
lon_dist = math.radians(row['lon'] - team_sch.iloc[i-1]['lon'])
temp_var = (
(math.sin(lat_dist/2) * math.sin(lat_dist/2)) +
(math.cos(lat1_rad) * math.cos(lat2_rad)) *
(math.sin(lon_dist/2) * math.sin(lon_dist/2))
)
temp_var2 = (
2 * math.atan2(math.sqrt(temp_var), math.sqrt(1-temp_var))
)
tot_dist = 6371e3 * temp_var2 / 1000
# End of Haversine formula
if tot_dist > 0:
travel_data_list.append(dict(
game_time=row['start_time'],
travel_date=pd.datetime.date(pd.to_datetime(row['start_time'])),
teamname=teamname,
travel_dist=tot_dist,
orig_lat=team_sch.iloc[i-1]['lat'],
orig_lon=team_sch.iloc[i - 1]['lon'],
dest_lat=row['lat'],
dest_lon=row['lon']
))
travel_df = pd.DataFrame(travel_data_list)
travel_team_df = pd.DataFrame(travel_df.groupby('teamname')['travel_dist'].sum())
travel_team_df = travel_team_df.assign(trips=travel_df.groupby('teamname')['travel_dist'].count())
travel_team_df = travel_team_df.assign(km_per_trip=travel_team_df.travel_dist / travel_team_df.trips)
travel_team_df.reset_index(inplace=True)
travel_team_df = travel_team_df.assign(teamname=travel_team_df.teamname.str.replace('_', ' ').str.title())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment