Skip to content

Instantly share code, notes, and snippets.

@lmeulen
lmeulen / text_summary_init.py
Last active October 23, 2022 13:20
text_summary_init
from nltk import tokenize, word_tokenize
with open("stopwords.txt"), "r", encoding="utf-8") as f:
text = " ".join(f.readlines())
STOP_WORDS = set(text.split())
@lmeulen
lmeulen / ergast_places_gained.py
Last active August 23, 2022 20:13
ergast_places_gained
year = 2022
rnd = 13
pgl = stats.get_race_results(year)
pgl = pgl[pgl['round'] == rnd][['driver', 'grid', 'position']]
pgl['change'] = pgl['grid'] - pgl['position']
def _color_red_or_green(val):
color = 'orangered' if val < 0 else 'springgreen'
return 'background-color: %s' % color
pgl.style.applymap(_color_red_or_green, subset=['change'])
@lmeulen
lmeulen / ergast_diver_comp_graph.py
Last active August 23, 2022 18:28
ergast_diver_comp_graph
fig, ax = plt.subplots(figsize=(8,3.5))
driver_clrs = [stats.get_driver_color(dr[0]), stats.get_driver_color(dr[1]) ]
ys = ['Points', 'Points', 'Wins', 'Wins', 'Podiums', 'Podiums',
'Quali', 'Quali', 'Race', 'Race']
xmaxs = [-pnts[0]/max(pnts), pnts[1]/max(pnts), \
-wins[0]/max(wins), wins[1]/max(wins), \
-podia[0]/max(podia), podia[1]/max(podia), \
-quali[0]/max(quali), quali[1]/max(quali), \
@lmeulen
lmeulen / ergast_head2head_calc.py
Last active August 23, 2022 15:47
ergast_head2head_calc
year = 2022
driver1 = 'VER'
driver2 = 'PER'
race_results = stats.get_race_results(year)
dr = [ race_results[race_results.code == driver1]['driverId'].values[0],
race_results[race_results.code == driver2]['driverId'].values[0] ]
race_results['position'] = race_results['position'].replace(0, 30)
race=[0, 0, 10]
@lmeulen
lmeulen / ergast_compare2prevyear_teams.py
Created August 22, 2022 16:06
ergast_compare2prevyear_teams
year=2022
nw = stats.get_wcc_standing(year)
rnd = nw['round'].max()
nw = nw[nw['round'] == rnd][['name', 'points']].sort_values('points', ascending=False)
pv = stats.get_wcc_standing(year-1)
pv = pv[pv['round'] == rnd][['name', 'points']].sort_values('points', ascending=False)
pv = pv.rename(columns={"points": "prev_year"})
@lmeulen
lmeulen / ergast_wdc_standing_over_season.py
Created August 22, 2022 05:34
ergast_wdc_standing_over_season
year=2022
standings = stats.get_wdc_standing(year)
stats.horizontal_driver_lines_plot(df=standings, xcolumn='round', ycolumn='position',
invert_yaxis=True,
title='Championships standings ({})'.format(year),
xlabel='Round', ylabel='Position')
@lmeulen
lmeulen / ergast_field_experience.py
Created August 21, 2022 19:11
ergast_field_experience
results = stats.get_race_results()
current_driver_ids = results[results['year'] == 2022]['driverId']
current_drivers = results[results['driverId'].isin(pd.unique(current_driver_ids))]
current_drivers = current_drivers[['driverId', 'driver', 'race']]. \
groupby(['driverId', 'driver']).count().reset_index()
stats.horizontal_barplot(df=current_drivers, rowcount=30, sort_value='race',
sort_ascending=False, invert_yaxis=True,
xcolumn='driver', ycolumn='race',
@lmeulen
lmeulen / ergast_average_age.py
Created August 20, 2022 19:21
ergast_average_age
df = stats.get_race_results().merge(stats.get_table('races'))
df = df.merge(stats.get_table('drivers'), on='driverId')
df['age'] = (pd.to_datetime(df['date']).dt.date - df['dob'])
df['age'] = df['age'] / np.timedelta64(1, 'Y')
df = df[['year', 'age']].groupby('year').mean()
fig, ax = plt.subplots(figsize=(15,8))
df.plot(ax=ax)
ax.set_ylim(25,40)
ax.set_title('Average age per year')
@lmeulen
lmeulen / ergast_most_wins_no_champ.py
Created August 20, 2022 11:15
ergast_most_wins_no_champ
winners = stats.get_winners()
wins_per_driver = winners[['driverId', 'year']].groupby('driverId').count(). \
reset_index().rename(columns={"year": "podiums"})
champs = pd.unique(stats.get_wdc_champions()['driverId'])
not_champ = wins_per_driver[~wins_per_driver.driverId.isin(champs)]
not_champ = not_champ.merge(stats.get_table('drivers'))
stats.horizontal_barplot(df=not_champ, rowcount=10, sort_value='podiums',
sort_ascending=False,
@lmeulen
lmeulen / ergast_constructor_most_wins.py
Created August 20, 2022 11:09
ergast_constructor_most_wins
wins = winners[['constructor', 'race']].groupby('constructor').count().reset_index()
stats.horizontal_barplot(df=wins, rowcount=10, sort_value='race',
sort_ascending=False, invert_yaxis=True,
xcolumn='constructor', ycolumn='race',
title='Wins per constructor',
xlabel='wins', ylabel='')