Skip to content

Instantly share code, notes, and snippets.

@nescobar
Created October 17, 2018 17:46
Show Gist options
  • Save nescobar/139c599fa135818ca6572cc45f0c6245 to your computer and use it in GitHub Desktop.
Save nescobar/139c599fa135818ca6572cc45f0c6245 to your computer and use it in GitHub Desktop.
ATP tennis retirements
ret_df_f.columns = ['tourney_year','surface','rets']
notret_df = tennis_df[tennis_df['score'].str.contains("RET")==False][['tourney_year','surface','tourney_id']]
notret_df_f = notret_df.groupby(['tourney_year','surface'], as_index=False).agg('count')[['tourney_year','surface','tourney_id']]
notret_df_f.columns = ['tourney_year','surface','norets']
dfs = (ret_df_f, notret_df_f)
dfs_concat = pd.concat(dfs)
dfs_c = dfs_concat.fillna(0).groupby(['tourney_year','surface']).agg({'rets':'sum','norets':'sum'}).reset_index()
dfs_c['ret_ratio'] = np.where(dfs_c['norets']>0, dfs_c['rets']/(dfs_c['rets']+dfs_c['norets']), 1)
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(111)
plt.style.use('seaborn-colorblind')
plt.title('Retirements - Evolution of Retirements by Surface')
plt.yscale('log') # Using log scale
plt.ylabel('Retirements ratio (log)')
plt.xlabel('Year')
plt.plot(dfs_c[dfs_c['surface']=='Hard']['tourney_year'], dfs_c[dfs_c['surface']=='Hard']['ret_ratio'], linestyle='solid', linewidth=2, solid_capstyle='projecting')
plt.plot(dfs_c[dfs_c['surface']=='Grass']['tourney_year'], dfs_c[dfs_c['surface']=='Grass']['ret_ratio'], linestyle='solid', marker='o', markerfacecolor='black', markersize=1, linewidth=3)
plt.plot(dfs_c[dfs_c['surface']=='Clay']['tourney_year'], dfs_c[dfs_c['surface']=='Clay']['ret_ratio'], linestyle='solid', marker='o', markerfacecolor='black', markersize=1, linewidth=3)
plt.legend(['Hard','Grass', 'Clay'], loc='upper left', prop={'size': 14})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment