Skip to content

Instantly share code, notes, and snippets.

View nescobar's full-sized avatar

Nicolas Escobar nescobar

  • Asuncion, Paraguay
View GitHub Profile
@nescobar
nescobar / atp_rankings.py
Created October 17, 2018 17:41
ATP top rivalries by decade
h2h_wl = tennis_df_all.groupby(['winner_name','loser_name']).agg({'tourney_id':'count','tourney_year':'max'}).reset_index()
h2h_wl.columns = ['player_a','player_b','total','year']
h2h_lw = tennis_df_all.groupby(['loser_name','winner_name']).agg({'tourney_id':'count','tourney_year':'max'}).reset_index()
h2h_lw.columns = ['player_a','player_b','total','year']
h2h_f = h2h_wl.merge(h2h_lw, on=['player_a', 'player_b'])
h2h_f['total'] = h2h_f['total_x'] + h2h_f['total_y']
h2h_f['player_a'] = np.where(h2h_f['player_a'] < h2h_f['player_b'], h2h_f['player_a'], h2h_f['player_b'])
h2h_f['player_b'] = np.where(h2h_f['player_a'] > h2h_f['player_b'], h2h_f['player_a'], h2h_f['player_b'])
h2h_f['year'] = np.where(h2h_f['year_x'] > h2h_f['year_y'], h2h_f['year_x'], h2h_f['year_y'])
@nescobar
nescobar / atp_retirements.py
Created October 17, 2018 17:46
ATP tennis retirements
ret_df_f.columns = ['tourney_year','surface','rets']
notret_df = tennis_df[tennis_df['score'].str.contains("RET")==False][['tourney_year','surface','tourney_id']]
notret_df_f = notret_df.groupby(['tourney_year','surface'], as_index=False).agg('count')[['tourney_year','surface','tourney_id']]
notret_df_f.columns = ['tourney_year','surface','norets']
dfs = (ret_df_f, notret_df_f)
dfs_concat = pd.concat(dfs)
dfs_c = dfs_concat.fillna(0).groupby(['tourney_year','surface']).agg({'rets':'sum','norets':'sum'}).reset_index()
# Extract retirements
ret_df = tennis_df[tennis_df['score'].str.contains("RET")==True][['tourney_year','tourney_level','surface','tourney_id','winner_name']]
ref_df_f = ret_df.groupby(['tourney_year','surface'], as_index=False).agg('count')
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(111)
plt.style.use('seaborn-colorblind')
plt.title('Retirements - Evolution of Retirements by Surface')
plt.ylabel('Number of Retirements')
@nescobar
nescobar / atp_age_grandslams.py
Created October 17, 2018 17:50
ATP age of Grand Slam winners
# Create dataframe with age of winners filtering Grand Slam finals
tennis_df_win=tennis_df[tennis_df['tourney_level'].isin(['G'])&(tennis_df['round']=='F')].dropna(subset=['winner_age'])
dfw = tennis_df_win[['tourney_year','tourney_name','winner_name','winner_age']]
dfw.columns = ['tourney_year','tourney_name','player','age']
dfs_final = dfw.groupby(['tourney_year','tourney_name']).agg({'age':'mean'}).reset_index()
dfs_final_2 = dfs_final.groupby(['tourney_year']).agg({'age':'mean'}).reset_index()
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(111)
@nescobar
nescobar / atp_effectiveness_method.py
Created October 17, 2018 17:52
Function to plot effectiveness of a player
# Function to plot effectiveness of a player
def plot_effectiveness(player):
pw = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','surface'], as_index=False).agg(['count'])
pww = pw['tourney_id'].reset_index()
pl = tennis_df[(tennis_df['loser_name'] == player)].groupby(['tourney_year','surface'], as_index=False).agg(['count'])
pll = pl['tourney_id'].reset_index()
pww.columns = ['tourney_year','surface','wins']
pll.columns = ['tourney_year','surface','loses']
# Unique number of players that won GS and Masters per period
s = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1975','1985'))].agg({'winner_name':'nunique'})
t = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1986','1996'))].agg({'winner_name':'nunique'})
u = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1997','2007'))].agg({'winner_name':'nunique'})
v = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('2008','2018'))].agg({'winner_name':'nunique'})
s['1975-1985'] = s['winner_name']
s=s.drop('winner_name')
t['1986-1996'] = t['winner_name']
t=t.drop('winner_name')
@nescobar
nescobar / atp_player_wins_history.py
Created October 17, 2018 17:54
Function that plots the history of wins of a particular player
# Function that plots the history of wins of a particular player
def plot_history_player(player):
# Create dataframe with winner_name = player
pldf_1 = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','tourney_level'], as_index=False).agg(['count'])
pldf_2 = pldf_1['tourney_id'].reset_index()
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(111)
ax.set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, 5))))
plt.figure(figsize=(20,4))
countries = {'ARG':'Argentina','ESP':'Spain','SUI':'Switzerland','USA':'United States','SRB':'Serbia'}
colors = ['blue','magenta','red','grey','black']
i=1
for k,v in countries.items():
plt.subplot(1,5,i)
s = tennis_df[(tennis_df['tourney_level'] == 'G') & (tennis_df['winner_ioc'].isin([k]))].groupby(['tourney_year','winner_ioc'], as_index=False).agg('count')
plt.plot(s['tourney_year'], s['tourney_id'], color=colors[i-1], linestyle='dashed', marker='o', markerfacecolor='blue', markersize=2)
# List of Grand Slams
tourneys = ['Australian Open','Roland Garros','Wimbledon','US Open']
# Create dataframe with data of finals where winner_rank values are not null
tennis_df_1 = tennis_df[~np.isnan(tennis_df['winner_rank']) & (tennis_df['round']=='F')].copy()
plt.figure(figsize=(20,4))
# Create one plot for each Grand Slam
for i in range(1,5):
plt.subplot(1,4,i)
@nescobar
nescobar / upload_to_s3.py
Last active September 16, 2020 19:17
Saves file in DBFS and then uploads it to S3
# AWS
ACCESS_KEY = dbutils.secrets.get(scope="aws-s3", key="access_key")
SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="secret_key")
ENCODED_SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="encoded_secret_key")
AWS_BUCKET_NAME = "bucket-static-webpages"
def upload_to_s3(file_name, file_content):
# Check if file_name is a key in dashboards dictionary
if file_name not in dashboards: