Created January 14, 2015 01:48
runner stats
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import glob
import os
#time data is provided as a string in the format of HH:MM:SS but hours are omitted in shorter races
def str_to_time_delta(x):
if x != x:
if len(x) == 5:
x = "00:" + x
t = datetime.strptime(x,"%H:%M:%S")
delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
return delta
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier
def load_runners(race, csvpath, headers=["Group Place","Name","Bib","Age","Place","Gender Place","5K Split","Clock Time","Net Time","Hometown"]):
runners = pd.DataFrame()
distance = 6.2
if race == 'half':
distance = 13.1
elif race == 'full':
distance = 26.2
for filename in glob.glob("*.csv"):
print('Parsing' + filename)
thisgroup = pd.read_csv(filename, skiprows=[0,1], names=headers, index_col="Place")
runners = runners.append(thisgroup)
#There's probably a way to do this all at once but I don't know it
runners["Clock Time"] = runners["Clock Time"].map(str_to_time_delta)
#runners["5K Split"] = runners["5K Split"].map(str_to_time_delta)
runners["Net Time"] = runners["Net Time"].map(str_to_time_delta)
runners["Start Time"] = runners["Clock Time"].subtract(runners["Net Time"])
#because pandas can't plot timedeltas we have to convert the time to something it can deal with
runners["Net Num"] = runners["Net Time"].map(lambda x: x/np.timedelta64(1, 's'))
runners["Clock Num"] = runners["Clock Time"].map(lambda x: x/np.timedelta64(1, 's'))
runners["Start Num"] = runners["Start Time"].map(lambda x: x/np.timedelta64(1, 's'))
return runners
full = load_runners('full',"./Full/",["Group Place","Name","Bib","Age","Place","Gender Place","5M Split","10M Split","Half Split","20M Split","Clock Time","Net Time","Hometown"])
