Skip to content

Instantly share code, notes, and snippets.

@jamiecook
Created March 1, 2024 08:03
Show Gist options
  • Save jamiecook/1a5e35aae48e7a45ecfa164038449835 to your computer and use it in GitHub Desktop.
Save jamiecook/1a5e35aae48e7a45ecfa164038449835 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import bs4 as bs
import urllib.request
import pandas as pd
import seaborn as sns
splats = ['buggy', 'wissler', 'cook, jamie', 'gossow']
hamilton_abbr = ['hamilton', 'hwcc']
lifecycle_abbr = ['lifecycle']
uq_abbr = ['university', 'uq']
bne_abbr = ['bne', 'brisbane']
sccc_abbr = ['sunshine']
mbcc_abbr = ['moreton']
def guess_team(row):
if "splat" in row.club.lower() or any([x in row.rider.lower() for x in splats]):
return "Splatties"
if "watt bomb" in row.club.lower():
return "WattBomb"
if "wolf" in row.club.lower():
return "Wolf Racing"
if "futuro" in row.club.lower():
return "Futuro Racing"
if "argenic" in row.club.lower():
return "Argenic Racing"
if "taylor" in row.club.lower():
return "Taylor Cycles Racing"
if "rats" in row.club.lower():
return "RATS Racing"
if "initiative" in row.club.lower():
return "Women's Cycling Development Initiative"
if "solaris" in row.club.lower():
return "Solaris Racing"
if any([x in row.club.lower() for x in mbcc_abbr]):
return "MBCC"
if any([x in row.club.lower() for x in hamilton_abbr]):
return "Hamilton"
if any([x in row.club.lower() for x in lifecycle_abbr]):
return "Lifecycle"
if any([x in row.club.lower() for x in uq_abbr]):
return "UQCC"
if any([x in row.club.lower() for x in bne_abbr]):
return "BNECC"
if any([x in row.club.lower() for x in sccc_abbr]):
return "SunnyCoast"
if any([x in row.club.lower() for x in ["Balmoral"]]):
return "Balmoral"
if any([x in row.club.lower() for x in ["Kangaroo"]]):
return "KPCC"
return "TheRest"
# url = "https://entryboss.cc/races/18894/startlist"
def get_riders(url):
source = urllib.request.urlopen(url).read()
soup = bs.BeautifulSoup(source,'lxml')
table = soup.find_all('table')
df = pd.read_html(str(table))[0]
df.rename(columns={'Club/Team': 'club', 'Participant': 'rider'}, inplace=True)
df['team'] = df.apply(guess_team, axis=1)
return df
def plot_category(df, category):
df_ = df[df.Category == category]
num_riders = df_.shape[0]
df_ = df_.groupby(['Category', 'team']).agg({'club': len})
df_ = df_.groupby(['Category', 'team'], group_keys=False)
df_ = df_.apply(lambda x: x.sort_values('club', ascending=False))
sns.set(font_scale=1)
ax = sns.catplot(df_, kind='bar',
x='team', y='club', col='Category', hue='team',
height=6, aspect=2.0, errorbar=None, legend=False)
ax.figure.subplots_adjust(top=1.0);
ax.figure.suptitle(f"Category {category} - {num_riders} riders")
if __name__ == "__main__":
df = get_riders(sys.argv[0])
plot_category(df, 'B')
plot_category(df, 'C')
plot_category(df, 'D')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment