Nicolas Escobar nescobar

## atp_rankings.py
h2h_wl = tennis_df_all.groupby(['winner_name','loser_name']).agg({'tourney_id':'count','tourney_year':'max'}).reset_index()
h2h_wl.columns = ['player_a','player_b','total','year']
h2h_lw = tennis_df_all.groupby(['loser_name','winner_name']).agg({'tourney_id':'count','tourney_year':'max'}).reset_index()
h2h_lw.columns = ['player_a','player_b','total','year']

h2h_f = h2h_wl.merge(h2h_lw, on=['player_a', 'player_b'])
h2h_f['total'] = h2h_f['total_x'] + h2h_f['total_y']
h2h_f['player_a'] = np.where(h2h_f['player_a'] < h2h_f['player_b'], h2h_f['player_a'], h2h_f['player_b'])
h2h_f['player_b'] = np.where(h2h_f['player_a'] > h2h_f['player_b'], h2h_f['player_a'], h2h_f['player_b'])
h2h_f['year'] = np.where(h2h_f['year_x'] > h2h_f['year_y'], h2h_f['year_x'], h2h_f['year_y'])

## atp_retirements.py
ret_df_f.columns = ['tourney_year','surface','rets']

notret_df = tennis_df[tennis_df['score'].str.contains("RET")==False][['tourney_year','surface','tourney_id']]
notret_df_f = notret_df.groupby(['tourney_year','surface'], as_index=False).agg('count')[['tourney_year','surface','tourney_id']]
notret_df_f.columns = ['tourney_year','surface','norets']

dfs = (ret_df_f, notret_df_f)
dfs_concat = pd.concat(dfs)

dfs_c = dfs_concat.fillna(0).groupby(['tourney_year','surface']).agg({'rets':'sum','norets':'sum'}).reset_index()

## atp_retirements_0.py
# Extract retirements
ret_df = tennis_df[tennis_df['score'].str.contains("RET")==True][['tourney_year','tourney_level','surface','tourney_id','winner_name']]
ref_df_f = ret_df.groupby(['tourney_year','surface'], as_index=False).agg('count')

fig = plt.figure(figsize=(15,5))

ax = fig.add_subplot(111)
plt.style.use('seaborn-colorblind')
plt.title('Retirements - Evolution of Retirements by Surface')
plt.ylabel('Number of Retirements')

## atp_age_grandslams.py
# Create dataframe with age of winners filtering Grand Slam finals
tennis_df_win=tennis_df[tennis_df['tourney_level'].isin(['G'])&(tennis_df['round']=='F')].dropna(subset=['winner_age'])
dfw = tennis_df_win[['tourney_year','tourney_name','winner_name','winner_age']]
dfw.columns = ['tourney_year','tourney_name','player','age']

dfs_final = dfw.groupby(['tourney_year','tourney_name']).agg({'age':'mean'}).reset_index()
dfs_final_2 = dfs_final.groupby(['tourney_year']).agg({'age':'mean'}).reset_index()

fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(111)

## atp_effectiveness_method.py
# Function to plot effectiveness of a player
def plot_effectiveness(player):

    pw = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','surface'], as_index=False).agg(['count'])
    pww = pw['tourney_id'].reset_index()
    pl = tennis_df[(tennis_df['loser_name'] == player)].groupby(['tourney_year','surface'], as_index=False).agg(['count'])
    pll = pl['tourney_id'].reset_index()
    pww.columns = ['tourney_year','surface','wins']
    pll.columns = ['tourney_year','surface','loses']


## atp_unique_gs_winners.py
# Unique number of players that won GS and Masters per period
s = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1975','1985'))].agg({'winner_name':'nunique'})
t = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1986','1996'))].agg({'winner_name':'nunique'})
u = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1997','2007'))].agg({'winner_name':'nunique'})
v = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('2008','2018'))].agg({'winner_name':'nunique'})

s['1975-1985'] = s['winner_name']
s=s.drop('winner_name')
t['1986-1996'] = t['winner_name']
t=t.drop('winner_name')

## atp_player_wins_history.py
# Function that plots the history of wins of a particular player
def plot_history_player(player):

    # Create dataframe with winner_name = player
    pldf_1 = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','tourney_level'], as_index=False).agg(['count'])
    pldf_2 = pldf_1['tourney_id'].reset_index()

    fig = plt.figure(figsize=(15,5))
    ax = fig.add_subplot(111)
    ax.set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, 5))))

## atp_countries_evolution.py
plt.figure(figsize=(20,4))

countries = {'ARG':'Argentina','ESP':'Spain','SUI':'Switzerland','USA':'United States','SRB':'Serbia'}
colors = ['blue','magenta','red','grey','black']

i=1
for k,v in countries.items():
    plt.subplot(1,5,i)
    s = tennis_df[(tennis_df['tourney_level'] == 'G') & (tennis_df['winner_ioc'].isin([k]))].groupby(['tourney_year','winner_ioc'], as_index=False).agg('count')
    plt.plot(s['tourney_year'], s['tourney_id'], color=colors[i-1], linestyle='dashed', marker='o', markerfacecolor='blue', markersize=2)

## atp_winners_rankings.py
# List of Grand Slams
tourneys = ['Australian Open','Roland Garros','Wimbledon','US Open']

# Create dataframe with data of finals where winner_rank values are not null
tennis_df_1 = tennis_df[~np.isnan(tennis_df['winner_rank']) & (tennis_df['round']=='F')].copy()
plt.figure(figsize=(20,4))

# Create one plot for each Grand Slam
for i in range(1,5):
    plt.subplot(1,4,i)

## upload_to_s3.py
# AWS
ACCESS_KEY = dbutils.secrets.get(scope="aws-s3", key="access_key")
SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="secret_key")
ENCODED_SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="encoded_secret_key")
AWS_BUCKET_NAME = "bucket-static-webpages"

def upload_to_s3(file_name, file_content):

  # Check if file_name is a key in dashboards dictionary
  if file_name not in dashboards:
	h2h_wl = tennis_df_all.groupby(['winner_name','loser_name']).agg({'tourney_id':'count','tourney_year':'max'}).reset_index()
	h2h_wl.columns = ['player_a','player_b','total','year']
	h2h_lw = tennis_df_all.groupby(['loser_name','winner_name']).agg({'tourney_id':'count','tourney_year':'max'}).reset_index()
	h2h_lw.columns = ['player_a','player_b','total','year']

	h2h_f = h2h_wl.merge(h2h_lw, on=['player_a', 'player_b'])
	h2h_f['total'] = h2h_f['total_x'] + h2h_f['total_y']
	h2h_f['player_a'] = np.where(h2h_f['player_a'] < h2h_f['player_b'], h2h_f['player_a'], h2h_f['player_b'])
	h2h_f['player_b'] = np.where(h2h_f['player_a'] > h2h_f['player_b'], h2h_f['player_a'], h2h_f['player_b'])
	h2h_f['year'] = np.where(h2h_f['year_x'] > h2h_f['year_y'], h2h_f['year_x'], h2h_f['year_y'])
	ret_df_f.columns = ['tourney_year','surface','rets']

	notret_df = tennis_df[tennis_df['score'].str.contains("RET")==False][['tourney_year','surface','tourney_id']]
	notret_df_f = notret_df.groupby(['tourney_year','surface'], as_index=False).agg('count')[['tourney_year','surface','tourney_id']]
	notret_df_f.columns = ['tourney_year','surface','norets']

	dfs = (ret_df_f, notret_df_f)
	dfs_concat = pd.concat(dfs)

	dfs_c = dfs_concat.fillna(0).groupby(['tourney_year','surface']).agg({'rets':'sum','norets':'sum'}).reset_index()
	# Extract retirements
	ret_df = tennis_df[tennis_df['score'].str.contains("RET")==True][['tourney_year','tourney_level','surface','tourney_id','winner_name']]
	ref_df_f = ret_df.groupby(['tourney_year','surface'], as_index=False).agg('count')

	fig = plt.figure(figsize=(15,5))

	ax = fig.add_subplot(111)
	plt.style.use('seaborn-colorblind')
	plt.title('Retirements - Evolution of Retirements by Surface')
	plt.ylabel('Number of Retirements')
	# Create dataframe with age of winners filtering Grand Slam finals
	tennis_df_win=tennis_df[tennis_df['tourney_level'].isin(['G'])&(tennis_df['round']=='F')].dropna(subset=['winner_age'])
	dfw = tennis_df_win[['tourney_year','tourney_name','winner_name','winner_age']]
	dfw.columns = ['tourney_year','tourney_name','player','age']

	dfs_final = dfw.groupby(['tourney_year','tourney_name']).agg({'age':'mean'}).reset_index()
	dfs_final_2 = dfs_final.groupby(['tourney_year']).agg({'age':'mean'}).reset_index()

	fig = plt.figure(figsize=(15,5))
	ax = fig.add_subplot(111)
	# Function to plot effectiveness of a player
	def plot_effectiveness(player):

	pw = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','surface'], as_index=False).agg(['count'])
	pww = pw['tourney_id'].reset_index()
	pl = tennis_df[(tennis_df['loser_name'] == player)].groupby(['tourney_year','surface'], as_index=False).agg(['count'])
	pll = pl['tourney_id'].reset_index()
	pww.columns = ['tourney_year','surface','wins']
	pll.columns = ['tourney_year','surface','loses']
	# Unique number of players that won GS and Masters per period
	s = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1975','1985'))].agg({'winner_name':'nunique'})
	t = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1986','1996'))].agg({'winner_name':'nunique'})
	u = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1997','2007'))].agg({'winner_name':'nunique'})
	v = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('2008','2018'))].agg({'winner_name':'nunique'})

	s['1975-1985'] = s['winner_name']
	s=s.drop('winner_name')
	t['1986-1996'] = t['winner_name']
	t=t.drop('winner_name')
	# Function that plots the history of wins of a particular player
	def plot_history_player(player):

	# Create dataframe with winner_name = player
	pldf_1 = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','tourney_level'], as_index=False).agg(['count'])
	pldf_2 = pldf_1['tourney_id'].reset_index()

	fig = plt.figure(figsize=(15,5))
	ax = fig.add_subplot(111)
	ax.set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, 5))))
	plt.figure(figsize=(20,4))

	countries = {'ARG':'Argentina','ESP':'Spain','SUI':'Switzerland','USA':'United States','SRB':'Serbia'}
	colors = ['blue','magenta','red','grey','black']

	i=1
	for k,v in countries.items():
	plt.subplot(1,5,i)
	s = tennis_df[(tennis_df['tourney_level'] == 'G') & (tennis_df['winner_ioc'].isin([k]))].groupby(['tourney_year','winner_ioc'], as_index=False).agg('count')
	plt.plot(s['tourney_year'], s['tourney_id'], color=colors[i-1], linestyle='dashed', marker='o', markerfacecolor='blue', markersize=2)
	# List of Grand Slams
	tourneys = ['Australian Open','Roland Garros','Wimbledon','US Open']

	# Create dataframe with data of finals where winner_rank values are not null
	tennis_df_1 = tennis_df[~np.isnan(tennis_df['winner_rank']) & (tennis_df['round']=='F')].copy()
	plt.figure(figsize=(20,4))

	# Create one plot for each Grand Slam
	for i in range(1,5):
	plt.subplot(1,4,i)
	# AWS
	ACCESS_KEY = dbutils.secrets.get(scope="aws-s3", key="access_key")
	SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="secret_key")
	ENCODED_SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="encoded_secret_key")
	AWS_BUCKET_NAME = "bucket-static-webpages"

	def upload_to_s3(file_name, file_content):

	# Check if file_name is a key in dashboards dictionary
	if file_name not in dashboards: