Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
runner stats
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import glob
import os
#time data is provided as a string in the format of HH:MM:SS but hours are omitted in shorter races
def str_to_time_delta(x):
if x != x:
if len(x) == 5:
x = "00:" + x
t = datetime.strptime(x,"%H:%M:%S")
delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
return delta
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier
def load_runners(race, csvpath, headers=["Group Place","Name","Bib","Age","Place","Gender Place","5K Split","Clock Time","Net Time","Hometown"]):
runners = pd.DataFrame()
distance = 6.2
if race == 'half':
distance = 13.1
elif race == 'full':
distance = 26.2
for filename in glob.glob("*.csv"):
print('Parsing' + filename)
thisgroup = pd.read_csv(filename, skiprows=[0,1], names=headers, index_col="Place")
runners = runners.append(thisgroup)
#There's probably a way to do this all at once but I don't know it
runners["Clock Time"] = runners["Clock Time"].map(str_to_time_delta)
#runners["5K Split"] = runners["5K Split"].map(str_to_time_delta)
runners["Net Time"] = runners["Net Time"].map(str_to_time_delta)
runners["Start Time"] = runners["Clock Time"].subtract(runners["Net Time"])
#because pandas can't plot timedeltas we have to convert the time to something it can deal with
runners["Net Num"] = runners["Net Time"].map(lambda x: x/np.timedelta64(1, 's'))
runners["Clock Num"] = runners["Clock Time"].map(lambda x: x/np.timedelta64(1, 's'))
runners["Start Num"] = runners["Start Time"].map(lambda x: x/np.timedelta64(1, 's'))
return runners
full = load_runners('full',"./Full/",["Group Place","Name","Bib","Age","Place","Gender Place","5M Split","10M Split","Half Split","20M Split","Clock Time","Net Time","Hometown"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment