Skip to content

Instantly share code, notes, and snippets.

@caot
Last active December 10, 2023 00:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save caot/b22e9e870dbe01cd5c351306ec26a3c8 to your computer and use it in GitHub Desktop.
Save caot/b22e9e870dbe01cd5c351306ec26a3c8 to your computer and use it in GitHub Desktop.
'''
* https://swimstandards.com/swimmer/
* https://www.swimcloud.com
* https://www.swimmingrank.com
* https://www.midnightsports.com/index2023.htm
'''
'''
set CONDA_PREFIX=C:\apps\anaconda3
set CONDA_DEFAULT_ENV=base
set CONDA_EXE=%CONDA_PREFIX%\Scripts\conda.exe
set CONDA_PROMPT_MODIFIER=(base)
set CONDA_PYTHON_EXE=%CONDA_PREFIX%\python.exe
set CONDA_SHLVL=1
set Path=%CONDA_PREFIX%;%CONDA_PREFIX%\Library\mingw-w64\bin;%CONDA_PREFIX%\Library\usr\bin;%CONDA_PREFIX%\Library\bin;%CONDA_PREFIX%\Scripts;%CONDA_PREFIX%\bin;%CONDA_PREFIX%\condabin;%Path%
'''
# http://gijskoot.nl/pandas/swimming/sports/records/2018/01/16/swimming-records.html
# https://en.wikipedia.org/wiki/List_of_world_records_in_swimming
import pandas as pd
import matplotlib.pyplot as plt
# import statsmodels.api as sm
# import matplotlib.pylab as plt
# from pandas.plotting import andrews_curves
import re
url = "PATH TO.html" # "C:\\XXX.html"
# with open(url, 'rt') as myfile:
# data = myfile.read().replace("<br>", '\n')
tables = pd.read_html(url, header=1, # skiprows=2,
encoding='utf-8', attrs={"class":"dbtable2"})
# print(tables)
df = tables[0].dropna(axis=0, how='all').dropna(axis=1, how='all')
# df.columns.str.replace(r'.*\)', '')
df.columns = df.columns.str.replace(r'.*\)', '', regex=True)
df = df[df['Event'].notna()]
print(df)
TO_DEBUG = True
if TO_DEBUG:
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
# pd.set_option('max_colwidth', -1)
pd.set_option('display.max_rows', None)
to_int = {'Pos': 'int', 'Pts': 'int', 'EventAgeCurrent': 'int', }
keys = list(to_int.keys())
# df[['Pts']] = df[['Pts']].fillna(value=0)
df[keys] = df[keys].fillna(value=0).astype('int')
# df.fillna({'Pts': 0, })
for i, e in enumerate(df['EventAgeCurrent']):
if e < 100:
df['EventAgeCurrent'][i] = e / 10
if e < 10000:
df['EventAgeCurrent'][i] = e / 10
df['Event'] = df['Event'].str.replace(r'.*\)', '', regex=True)
try:
df['Finals'] = df['Finals'].fillna('') + ' ' + df['Prelim.'].fillna('')
except Exception as e:
pass
print('--------------------------------------------------------------------')
print(df)
print('--------------------------------------------------------------------')
parsed_Finals = df.Finals.str.extract(
"(?P<m>\d{1,2})?:?(?P<s>\d{2})\.(?P<ms>\d{2})*", expand=True)
# print(parsed_Finals)
df['Finals_in_seconds'] = df.Finals
Finals_in_seconds = parsed_Finals.m.astype(float).fillna(
0) * 60 + parsed_Finals.s.astype(float) + parsed_Finals.ms.astype(float) / 100
df.Finals_in_seconds = Finals_in_seconds
df['Date ofSwim'] = pd.DatetimeIndex(data=df['Date ofSwim'])
df = df.sort_values(by=['Event', 'Date ofSwim'], ascending=True,)
print(df)
# df = df.reindex(sorted(df.columns), axis=1)
# df.sort_index(axis=1)
# df = df.sort_index(axis=1)
# https://stackoverflow.com/questions/43707620/plotting-a-time-series
def plot_50_Free():
dfg = df.loc[df['Event'] == '50 Free']
# dfg['Date ofSwim'] = pd.DatetimeIndex(data=dfg['Date ofSwim'])
# dfg = dfg.sort_values(by=['Event', 'Date ofSwim'], ascending=True,)
# print(dfg)
dfg.plot(x='Date ofSwim', y='Finals_in_seconds')
plot_50_Free()
def plot_df_pivot_table():
# https://stackoverflow.com/questions/38197964/pandas-plot-multiple-time-series-dataframe-into-a-single-plot
df.pivot_table(index='Date ofSwim', columns='Event',
values='Finals_in_seconds').plot()
# plot_df_pivot_table()
# multiple line plot
def plot_multiple_line():
# Valid font size are xx-small, x-small, small, medium, large, x-large,
# xx-large, larger, smaller, None
plt.xticks(
rotation=45,
horizontalalignment='right',
fontweight='light',
fontsize='medium', # 'x-large'
)
for e in sorted(list(set(df.Event)), key=lambda x: int(x.split()[0])):
dfg = df.loc[df['Event'] == e]
label = list(
map(list, zip(dfg['Date ofSwim'].dt.strftime('%Y-%m'), dfg['Finals'])))
plt.plot(dfg['Date ofSwim'], dfg['Finals_in_seconds'],
marker=".", markersize=10, label=e)
plt.title('Swimmer: %s' % swimmer)
# plt.annotate(label, # this is the text
# (dfg['Date ofSwim'], dfg['Finals']), # these are the coordinates to position the label
# textcoords="offset points", # how to position the text
# xytext=(0,10), # distance from text to points (x,y)
# ha='center') # horizontal alignment can be left, right or center
plt.legend()
# plt.xlabel(ax.get_xlabel(), rotation=90)
plot_multiple_line()
plt.show()
@caot
Copy link
Author

caot commented Oct 19, 2019

@caot
Copy link
Author

caot commented Oct 19, 2019

@caot
Copy link
Author

caot commented Dec 5, 2022

@caot
Copy link
Author

caot commented Jan 19, 2023

@caot
Copy link
Author

caot commented Oct 16, 2023

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment