Skip to content

Instantly share code, notes, and snippets.

@kellbot
Created January 14, 2015 01:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kellbot/1bab3ae83d7b80ee382a to your computer and use it in GitHub Desktop.
Save kellbot/1bab3ae83d7b80ee382a to your computer and use it in GitHub Desktop.
runner stats
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import glob
import os
#time data is provided as a string in the format of HH:MM:SS but hours are omitted in shorter races
def str_to_time_delta(x):
if x != x:
return
if len(x) == 5:
x = "00:" + x
t = datetime.strptime(x,"%H:%M:%S")
delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
return delta
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier
def load_runners(race, csvpath, headers=["Group Place","Name","Bib","Age","Place","Gender Place","5K Split","Clock Time","Net Time","Hometown"]):
runners = pd.DataFrame()
distance = 6.2
os.chdir(csvpath)
if race == 'half':
distance = 13.1
elif race == 'full':
distance = 26.2
for filename in glob.glob("*.csv"):
print('Parsing' + filename)
thisgroup = pd.read_csv(filename, skiprows=[0,1], names=headers, index_col="Place")
runners = runners.append(thisgroup)
runners.sort_index(inplace=True)
#There's probably a way to do this all at once but I don't know it
runners["Clock Time"] = runners["Clock Time"].map(str_to_time_delta)
#runners["5K Split"] = runners["5K Split"].map(str_to_time_delta)
runners["Net Time"] = runners["Net Time"].map(str_to_time_delta)
runners["Start Time"] = runners["Clock Time"].subtract(runners["Net Time"])
#because pandas can't plot timedeltas we have to convert the time to something it can deal with
runners["Net Num"] = runners["Net Time"].map(lambda x: x/np.timedelta64(1, 's'))
runners["Clock Num"] = runners["Clock Time"].map(lambda x: x/np.timedelta64(1, 's'))
runners["Start Num"] = runners["Start Time"].map(lambda x: x/np.timedelta64(1, 's'))
return runners
full = load_runners('full',"./Full/",["Group Place","Name","Bib","Age","Place","Gender Place","5M Split","10M Split","Half Split","20M Split","Clock Time","Net Time","Hometown"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment