This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
h2h_wl = tennis_df_all.groupby(['winner_name','loser_name']).agg({'tourney_id':'count','tourney_year':'max'}).reset_index() | |
h2h_wl.columns = ['player_a','player_b','total','year'] | |
h2h_lw = tennis_df_all.groupby(['loser_name','winner_name']).agg({'tourney_id':'count','tourney_year':'max'}).reset_index() | |
h2h_lw.columns = ['player_a','player_b','total','year'] | |
h2h_f = h2h_wl.merge(h2h_lw, on=['player_a', 'player_b']) | |
h2h_f['total'] = h2h_f['total_x'] + h2h_f['total_y'] | |
h2h_f['player_a'] = np.where(h2h_f['player_a'] < h2h_f['player_b'], h2h_f['player_a'], h2h_f['player_b']) | |
h2h_f['player_b'] = np.where(h2h_f['player_a'] > h2h_f['player_b'], h2h_f['player_a'], h2h_f['player_b']) | |
h2h_f['year'] = np.where(h2h_f['year_x'] > h2h_f['year_y'], h2h_f['year_x'], h2h_f['year_y']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ret_df_f.columns = ['tourney_year','surface','rets'] | |
notret_df = tennis_df[tennis_df['score'].str.contains("RET")==False][['tourney_year','surface','tourney_id']] | |
notret_df_f = notret_df.groupby(['tourney_year','surface'], as_index=False).agg('count')[['tourney_year','surface','tourney_id']] | |
notret_df_f.columns = ['tourney_year','surface','norets'] | |
dfs = (ret_df_f, notret_df_f) | |
dfs_concat = pd.concat(dfs) | |
dfs_c = dfs_concat.fillna(0).groupby(['tourney_year','surface']).agg({'rets':'sum','norets':'sum'}).reset_index() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extract retirements | |
ret_df = tennis_df[tennis_df['score'].str.contains("RET")==True][['tourney_year','tourney_level','surface','tourney_id','winner_name']] | |
ref_df_f = ret_df.groupby(['tourney_year','surface'], as_index=False).agg('count') | |
fig = plt.figure(figsize=(15,5)) | |
ax = fig.add_subplot(111) | |
plt.style.use('seaborn-colorblind') | |
plt.title('Retirements - Evolution of Retirements by Surface') | |
plt.ylabel('Number of Retirements') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create dataframe with age of winners filtering Grand Slam finals | |
tennis_df_win=tennis_df[tennis_df['tourney_level'].isin(['G'])&(tennis_df['round']=='F')].dropna(subset=['winner_age']) | |
dfw = tennis_df_win[['tourney_year','tourney_name','winner_name','winner_age']] | |
dfw.columns = ['tourney_year','tourney_name','player','age'] | |
dfs_final = dfw.groupby(['tourney_year','tourney_name']).agg({'age':'mean'}).reset_index() | |
dfs_final_2 = dfs_final.groupby(['tourney_year']).agg({'age':'mean'}).reset_index() | |
fig = plt.figure(figsize=(15,5)) | |
ax = fig.add_subplot(111) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function to plot effectiveness of a player | |
def plot_effectiveness(player): | |
pw = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','surface'], as_index=False).agg(['count']) | |
pww = pw['tourney_id'].reset_index() | |
pl = tennis_df[(tennis_df['loser_name'] == player)].groupby(['tourney_year','surface'], as_index=False).agg(['count']) | |
pll = pl['tourney_id'].reset_index() | |
pww.columns = ['tourney_year','surface','wins'] | |
pll.columns = ['tourney_year','surface','loses'] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Unique number of players that won GS and Masters per period | |
s = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1975','1985'))].agg({'winner_name':'nunique'}) | |
t = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1986','1996'))].agg({'winner_name':'nunique'}) | |
u = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1997','2007'))].agg({'winner_name':'nunique'}) | |
v = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('2008','2018'))].agg({'winner_name':'nunique'}) | |
s['1975-1985'] = s['winner_name'] | |
s=s.drop('winner_name') | |
t['1986-1996'] = t['winner_name'] | |
t=t.drop('winner_name') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function that plots the history of wins of a particular player | |
def plot_history_player(player): | |
# Create dataframe with winner_name = player | |
pldf_1 = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','tourney_level'], as_index=False).agg(['count']) | |
pldf_2 = pldf_1['tourney_id'].reset_index() | |
fig = plt.figure(figsize=(15,5)) | |
ax = fig.add_subplot(111) | |
ax.set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, 5)))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(20,4)) | |
countries = {'ARG':'Argentina','ESP':'Spain','SUI':'Switzerland','USA':'United States','SRB':'Serbia'} | |
colors = ['blue','magenta','red','grey','black'] | |
i=1 | |
for k,v in countries.items(): | |
plt.subplot(1,5,i) | |
s = tennis_df[(tennis_df['tourney_level'] == 'G') & (tennis_df['winner_ioc'].isin([k]))].groupby(['tourney_year','winner_ioc'], as_index=False).agg('count') | |
plt.plot(s['tourney_year'], s['tourney_id'], color=colors[i-1], linestyle='dashed', marker='o', markerfacecolor='blue', markersize=2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# List of Grand Slams | |
tourneys = ['Australian Open','Roland Garros','Wimbledon','US Open'] | |
# Create dataframe with data of finals where winner_rank values are not null | |
tennis_df_1 = tennis_df[~np.isnan(tennis_df['winner_rank']) & (tennis_df['round']=='F')].copy() | |
plt.figure(figsize=(20,4)) | |
# Create one plot for each Grand Slam | |
for i in range(1,5): | |
plt.subplot(1,4,i) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# AWS | |
ACCESS_KEY = dbutils.secrets.get(scope="aws-s3", key="access_key") | |
SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="secret_key") | |
ENCODED_SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="encoded_secret_key") | |
AWS_BUCKET_NAME = "bucket-static-webpages" | |
def upload_to_s3(file_name, file_content): | |
# Check if file_name is a key in dashboards dictionary | |
if file_name not in dashboards: |
OlderNewer