cjimmy/lavaman2015-pass.py

## lavaman2015-pass.py
from datetime import timedelta
from enum import IntEnum
import csv

# enumerate the columns for readability
class Col(IntEnum):
	place = 0
	divTot = 1
	bib = 2
	cat = 3
	firstName = 4
	lastName = 5
	age = 6
	sex = 7
	div = 8
	swimRank = 9
	swimTime = 10
	swimPace = 11
	t1Time = 12
	bikeRank = 13
	bikeTime = 14
	bikeSpeed = 15
	t2Rank = 16
	t2Time = 17
	runRank = 18
	runTime = 19
	runPace = 20
	totalTime = 21

class StartTimes(IntEnum):
	proElite = 0
	M044 = 5
	M4454 = 10
	M55 = 15
	F039 = 20
	F4049 = 25
	F50 = 30
	# Novice = 35

wave2 = ['M119', 'M2024', 'M2529', 'M3034', 'M3539','M4044']
wave3 = ['M4549', 'M5054']
wave4 = ['M5559', 'M6064', 'M6569', 'M7074', 'M75PL']
wave5 = ['F119', 'F2024', 'F2529', 'F3034', 'F3539']
wave6 = ['F4044', 'F4549']
wave7 = ['F5054', 'F5559', 'F6064', 'F6569', 'F7074', 'F75PL']


#import lavaman results (the individual results copied into a different file)
r = []
with open("results.txt") as f:
	for line in f:
		r.append(line.strip().split())


# remove dirty data (i.e. results where times are clumped)
# not ideal, sorry to those 51 athletes whose stupid ankle bracelets didnt pick up
# TODO "clean" data by getting a best estimate of their rank by giving them an average time
# of the 10 people ranked around them.
r2 = [line for line in r if len(line) >= 22]
r = r2


#combine long last names to get column indicies lined up
for line in r:
	if not line[Col.age].isdigit():
		i = Col.age
		while not line[i].isdigit():
			i = i+1
		for j in range(Col.age,i):
			line[5] = line[5] + ' ' + line[j]
		for j in range(Col.age, i):
			line.remove(line[Col.age]) #dangerous to remove in place. removing and staying in the same place

#convert times to python timedelta objects
for line in r:
	for i, datum in enumerate(line):
		if datum.find(':') != -1: # if it's a time
			t2 = datum.split(':')
			if len(t2) == 2: # if it's "12:34" which doesnt have hours
				datum = timedelta(minutes=int(t2[0]), seconds=int(t2[1]))
			if len(t2) == 3:
				datum = timedelta(hours=int(t2[0]), minutes=int(t2[1]), seconds=int(t2[2]))
			line[i] = datum #not great form to modify without slice, but mehfor line in r:


#-- adding together the split times and comparing it to the finish time, it looks like the finish time is determined independent of the split times (that's good)
#-- and the split times are rounded up from the milliseconds. Code to show that:
# for line in r:
# 	print (line[swimTime] + line[t1Time] + line[bikeTime] + line[t2Time] + line[runTime]) - line[totalTime]


for athlete in r:
	if athlete[Col.cat] == 'P':
		athlete.append('1')
	elif athlete[Col.div] in wave2:
		athlete.append('2')
	elif athlete[Col.div] in wave3:
		athlete.append('3')
	elif athlete[Col.div] in wave4:
		athlete.append('4')
	elif athlete[Col.div] in wave5:
		athlete.append('5')
	elif athlete[Col.div] in wave6:
		athlete.append('6')
	elif athlete[Col.div] in wave7:
		athlete.append('7')
	else:
		print("not a recognized division")

# wave start offset
def getStartTime(athlete):
	if athlete[Col.cat] == 'P':
		startTime = timedelta(0);
	elif athlete[Col.div] in wave2:
		startTime = timedelta(minutes=5);
	elif athlete[Col.div] in wave3:
		startTime = timedelta(minutes=10);
	elif athlete[Col.div] in wave4:
		startTime = timedelta(minutes=15);
	elif athlete[Col.div] in wave5:
		startTime = timedelta(minutes=20);
	elif athlete[Col.div] in wave6:
		startTime = timedelta(minutes=25);
	elif athlete[Col.div] in wave7:
		startTime = timedelta(minutes=30);
	else:
		print("not a recognized division")
		return None
	return startTime


#-- find index of bib numbers in lists (i.e. rank-1) and find difference
def findIndexOfBibInLeg(targetBib, leg):
	target = str(targetBib)
	for i, row in enumerate(leg):
		for j, bib in enumerate(row):
			if bib == target:
				return i
	return None

# calculate the aggregated times
# overall absolute times, accounting for wave start times
timesByOverall = []
for athlete in r:
	times = []
	startTime = getStartTime(athlete)
	times.append(athlete[Col.bib])
	times.append(startTime + athlete[Col.swimTime])
	times.append(startTime + athlete[Col.swimTime] + athlete[Col.t1Time])
	times.append(startTime + athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime])
	times.append(startTime + athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime] + athlete[Col.t2Time])
	times.append(startTime + athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime] + athlete[Col.t2Time] + athlete[Col.runTime])
	timesByOverall.append(times)


# copypaste. not proud of it, okay?
# Create timesByOverallStartTogether, a list of each atheletes aggregate times, without accounting for waves
# i.e. these are the individual aggregate times after each leg if everybody started at the same time
timesByOverallStartTogether = []
for athlete in r:
	times = []
	times.append(athlete[Col.bib])
	times.append(athlete[Col.swimTime])
	times.append(athlete[Col.swimTime] + athlete[Col.t1Time])
	times.append(athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime])
	times.append(athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime] + athlete[Col.t2Time])
	times.append(athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime] + athlete[Col.t2Time] + athlete[Col.runTime])
	timesByOverallStartTogether.append(times)

# create different sorted lists
def getKeySwim(row):
	return row[1]
def getKeyT1(row):
	return row[2]
def getKeyBike(row):
	return row[3]
def getKeyT2(row):
	return row[4]
def getKeyRun(row):
	return row[5]

timesBySwim = sorted(timesByOverall, key=getKeySwim)
timesByT1 = sorted(timesByOverall, key=getKeyT1)
timesByBike = sorted(timesByOverall, key=getKeyBike)
timesByT2 = sorted(timesByOverall, key=getKeyT2)
timesByRun = sorted(timesByOverall, key=getKeyRun)

timesBySwimStartTogether = sorted(timesByOverallStartTogether, key=getKeySwim)
timesByT1StartTogether  = sorted(timesByOverallStartTogether, key=getKeyT1)
timesByBikeStartTogether  = sorted(timesByOverallStartTogether, key=getKeyBike)
timesByT2StartTogether  = sorted(timesByOverallStartTogether, key=getKeyT2)
timesByRunStartTogether  = sorted(timesByOverallStartTogether, key=getKeyRun)

#-- returns number of how many people you passed IN EACH OF the legs T-1, Bike, T-2, and the run, given wave starts
#-- it does this by taking your rank before a leg, and substracting your rank after that leg.
#-- positive means you passed that many people, negative means that many people passed you
def numPplPassed(bib):
	nPassed = []
	nPassed.append(findIndexOfBibInLeg(bib, timesBySwim) - findIndexOfBibInLeg(bib, timesByT1)) # number of people passed in T1, seems opposite but we're using rank, which is opposite (1 is better than 2)
	nPassed.append(findIndexOfBibInLeg(bib, timesByT1) - findIndexOfBibInLeg(bib, timesByBike))
	nPassed.append(findIndexOfBibInLeg(bib, timesByBike) - findIndexOfBibInLeg(bib, timesByT2))
	nPassed.append(findIndexOfBibInLeg(bib, timesByT2) - findIndexOfBibInLeg(bib, timesByRun))
	return nPassed

#-- returns number of how many people you passed IN EACH OF the legs T-1, Bike, T-2, and the run if everyone started together
def numPplPassedStartTogether(bib):
	nPassed = []
	nPassed.append(findIndexOfBibInLeg(bib, timesBySwimStartTogether) - findIndexOfBibInLeg(bib, timesByT1StartTogether))
	nPassed.append(findIndexOfBibInLeg(bib, timesByT1StartTogether) - findIndexOfBibInLeg(bib, timesByBikeStartTogether))
	nPassed.append(findIndexOfBibInLeg(bib, timesByBikeStartTogether) - findIndexOfBibInLeg(bib, timesByT2StartTogether))
	nPassed.append(findIndexOfBibInLeg(bib, timesByT2StartTogether) - findIndexOfBibInLeg(bib, timesByRunStartTogether))
	return nPassed


#-- returns aggregate number of people you passed SO FAR after each leg T-1, Bike, T-2, and the run
# def aggregatePplPassed(bib):
# 	nPassed = []
# 	nPassed.append(findIndexOfBibInLeg(bib, timesBySwim) - findIndexOfBibInLeg(bib, timesByT1))
# 	nPassed.append(findIndexOfBibInLeg(bib, timesBySwim) - findIndexOfBibInLeg(bib, timesByBike))
# 	nPassed.append(findIndexOfBibInLeg(bib, timesBySwim) - findIndexOfBibInLeg(bib, timesByT2))
# 	nPassed.append(findIndexOfBibInLeg(bib, timesBySwim) - findIndexOfBibInLeg(bib, timesByRun))
# 	return nPassed


#-- returns the number of people you passed in waves that started before you
#-- algorithm: find your swim time and division
#-- then find all people in waves before you who had longer times = number of people you passed
#-- find all people in waves after you (i.e. excluding your group and preceding gorups)
#--  who had shorter swim times + wave start time = number of people passed you
#-- messy brute force way
def numPplPassedInSwim(bib):
	targetAthlete = []
	youPassed = []
	passedYou = []
	for athlete in r:
		if athlete[Col.bib] == str(bib):
			targetAthlete = athlete
			break

	if athlete[Col.cat] == 'P':
		passedYou = [athlete for athlete in r if ((athlete[Col.cat] != 'P') and (athlete[Col.swimTime] + getStartTime(athlete)  < targetAthlete[Col.swimTime]))]

	elif targetAthlete[Col.div] in wave2:
		excludingGroups = wave2 # and pros
		youPassed = [athlete for athlete in r if (athlete[Col.cat] == 'P' and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
		passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	elif targetAthlete[Col.div] in wave3:
		groupsBeforeYou = wave2 # and pros
		excludingGroups = groupsBeforeYou + wave3
		youPassed = [athlete for athlete in r if ((athlete[Col.cat] == 'P' or (athlete[Col.div] in groupsBeforeYou)) and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
		passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	elif targetAthlete[Col.div] in wave4:
		groupsBeforeYou = wave2 + wave3 # and pros
		excludingGroups = groupsBeforeYou + wave4
		youPassed = [athlete for athlete in r if ((athlete[Col.cat] == 'P' or (athlete[Col.div] in groupsBeforeYou)) and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
		passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	elif targetAthlete[Col.div] in wave5:
		groupsBeforeYou = wave2 + wave3 + wave4 # and pros
		excludingGroups = groupsBeforeYou + wave5
		youPassed = [athlete for athlete in r if ((athlete[Col.cat] == 'P' or (athlete[Col.div] in groupsBeforeYou)) and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
		passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	elif targetAthlete[Col.div] in wave6:
		groupsBeforeYou = wave2 + wave3 + wave4 + wave5 # and pros
		excludingGroups = groupsBeforeYou + wave6
		youPassed = [athlete for athlete in r if ((athlete[Col.cat] == 'P' or (athlete[Col.div] in groupsBeforeYou)) and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
		passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	elif targetAthlete[Col.div] in wave7:
		groupsBeforeYou = wave2 + wave3 + wave4 + wave5 + wave6 # and pros
		excludingGroups = groupsBeforeYou + wave7
		youPassed = [athlete for athlete in r if ((athlete[Col.cat] == 'P' or (athlete[Col.div] in groupsBeforeYou)) and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
		passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	return len(youPassed) - len(passedYou)


#---------------------------------------------------
# append the calculated numbers onto the athlete object to be written to the file
for athlete in r:
	athlete.append(numPplPassedInSwim(athlete[Col.bib])) #special case swim
	passedNumbers = numPplPassed(athlete[Col.bib]) # for each athlete, calculate the number of people passed with wave starts in each leg
	for stat in passedNumbers:
		athlete.append(stat)

	passedNumbersStartTogether = numPplPassedStartTogether(athlete[Col.bib]) # do the same but ignoring wave starts
	for stat in passedNumbersStartTogether:
		athlete.append(stat)

	#-- annnnd append the estimated rank for t1 to show it in the percentiles
	athlete.append(findIndexOfBibInLeg(athlete[Col.bib], timesByT1) + 1) # rank = index + 1


#convert datetimes to seconds for d3 to use easier
for line in r:
	for i, datum in enumerate(line):
		if type(datum) is timedelta:
			line[i] = int(datum.total_seconds())


# when this script is run, results.csv is overwritten with the caluclated numbers
header = ['place','divTot','bib','cat','firstName','lastName','age','sex','div','swimRank','swimTime','swimPace','t1Time','bikeRank','bikeTime','bikeSpeed','t2Rank','t2Time','runRank','runTime','runPace','totalTime','wave','wavePassedSwim','wavePassedT1','wavePassedBike','wavePassedT2','wavePassedRun','startPassedT1','startPassedBike','startPassedT2','startPassedRun', 'estimatedT1Rank']
with open('results.csv', 'w', newline='') as csvfile:
	writer = csv.writer(csvfile, delimiter=',')
	writer.writerow(header)
	for line in r:
		writer.writerow(line)


#---------------------------------------------------


#-- number of people "passed" is aggregate rank after leg_1 - aggregate rank after leg_2
#-- if negative, people passed you
# print('In the swim, you passed %s people in the waves ahead of you' % numPplPassedInSwim(122))
# print(numPplPassed(122))
# print(aggregatePplPassed(122))


# passed = []
# for i in range(0, 1020):
# 	try:
# 		passed.append(findIndexOfBibInLeg(i, timesBySwim) - findIndexOfBibInLeg(i, timesByRun))
# 	except (RuntimeError, TypeError, NameError):
# 		pass
# print(sorted(passed)) #-- to see the distribution of overall number of people passed

#convert to dictionary (key = bib num, value = list of aggregate times)


# algorithm
# given bib number, output number and list of people who you passed or passed you in a given leg
#----------- if everyone started at the same time
# compute aggregated time for each leg
# sort based on aggregated time (i.e. get overall rank at each leg finish)
# passed those with higher rank in previous leg

#----------- given wave start
# first wave starts at 00:00:00
# second wave starts at 00:00:05
# third wave starts at 00:00;10, etc
# rank is not given until after the athlete has completed the swim
	from datetime import timedelta
	from enum import IntEnum
	import csv

	# enumerate the columns for readability
	class Col(IntEnum):
	place = 0
	divTot = 1
	bib = 2
	cat = 3
	firstName = 4
	lastName = 5
	age = 6
	sex = 7
	div = 8
	swimRank = 9
	swimTime = 10
	swimPace = 11
	t1Time = 12
	bikeRank = 13
	bikeTime = 14
	bikeSpeed = 15
	t2Rank = 16
	t2Time = 17
	runRank = 18
	runTime = 19
	runPace = 20
	totalTime = 21

	class StartTimes(IntEnum):
	proElite = 0
	M044 = 5
	M4454 = 10
	M55 = 15
	F039 = 20
	F4049 = 25
	F50 = 30
	# Novice = 35

	wave2 = ['M119', 'M2024', 'M2529', 'M3034', 'M3539','M4044']
	wave3 = ['M4549', 'M5054']
	wave4 = ['M5559', 'M6064', 'M6569', 'M7074', 'M75PL']
	wave5 = ['F119', 'F2024', 'F2529', 'F3034', 'F3539']
	wave6 = ['F4044', 'F4549']
	wave7 = ['F5054', 'F5559', 'F6064', 'F6569', 'F7074', 'F75PL']




	#import lavaman results (the individual results copied into a different file)
	r = []
	with open("results.txt") as f:
	for line in f:
	r.append(line.strip().split())


	# remove dirty data (i.e. results where times are clumped)
	# not ideal, sorry to those 51 athletes whose stupid ankle bracelets didnt pick up
	# TODO "clean" data by getting a best estimate of their rank by giving them an average time
	# of the 10 people ranked around them.
	r2 = [line for line in r if len(line) >= 22]
	r = r2


	#combine long last names to get column indicies lined up
	for line in r:
	if not line[Col.age].isdigit():
	i = Col.age
	while not line[i].isdigit():
	i = i+1
	for j in range(Col.age,i):
	line[5] = line[5] + ' ' + line[j]
	for j in range(Col.age, i):
	line.remove(line[Col.age]) #dangerous to remove in place. removing and staying in the same place

	#convert times to python timedelta objects
	for line in r:
	for i, datum in enumerate(line):
	if datum.find(':') != -1: # if it's a time
	t2 = datum.split(':')
	if len(t2) == 2: # if it's "12:34" which doesnt have hours
	datum = timedelta(minutes=int(t2[0]), seconds=int(t2[1]))
	if len(t2) == 3:
	datum = timedelta(hours=int(t2[0]), minutes=int(t2[1]), seconds=int(t2[2]))
	line[i] = datum #not great form to modify without slice, but mehfor line in r:



	#-- adding together the split times and comparing it to the finish time, it looks like the finish time is determined independent of the split times (that's good)
	#-- and the split times are rounded up from the milliseconds. Code to show that:
	# for line in r:
	# print (line[swimTime] + line[t1Time] + line[bikeTime] + line[t2Time] + line[runTime]) - line[totalTime]



	for athlete in r:
	if athlete[Col.cat] == 'P':
	athlete.append('1')
	elif athlete[Col.div] in wave2:
	athlete.append('2')
	elif athlete[Col.div] in wave3:
	athlete.append('3')
	elif athlete[Col.div] in wave4:
	athlete.append('4')
	elif athlete[Col.div] in wave5:
	athlete.append('5')
	elif athlete[Col.div] in wave6:
	athlete.append('6')
	elif athlete[Col.div] in wave7:
	athlete.append('7')
	else:
	print("not a recognized division")

	# wave start offset
	def getStartTime(athlete):
	if athlete[Col.cat] == 'P':
	startTime = timedelta(0);
	elif athlete[Col.div] in wave2:
	startTime = timedelta(minutes=5);
	elif athlete[Col.div] in wave3:
	startTime = timedelta(minutes=10);
	elif athlete[Col.div] in wave4:
	startTime = timedelta(minutes=15);
	elif athlete[Col.div] in wave5:
	startTime = timedelta(minutes=20);
	elif athlete[Col.div] in wave6:
	startTime = timedelta(minutes=25);
	elif athlete[Col.div] in wave7:
	startTime = timedelta(minutes=30);
	else:
	print("not a recognized division")
	return None
	return startTime



	#-- find index of bib numbers in lists (i.e. rank-1) and find difference
	def findIndexOfBibInLeg(targetBib, leg):
	target = str(targetBib)
	for i, row in enumerate(leg):
	for j, bib in enumerate(row):
	if bib == target:
	return i
	return None

	# calculate the aggregated times
	# overall absolute times, accounting for wave start times
	timesByOverall = []
	for athlete in r:
	times = []
	startTime = getStartTime(athlete)
	times.append(athlete[Col.bib])
	times.append(startTime + athlete[Col.swimTime])
	times.append(startTime + athlete[Col.swimTime] + athlete[Col.t1Time])
	times.append(startTime + athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime])
	times.append(startTime + athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime] + athlete[Col.t2Time])
	times.append(startTime + athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime] + athlete[Col.t2Time] + athlete[Col.runTime])
	timesByOverall.append(times)


	# copypaste. not proud of it, okay?
	# Create timesByOverallStartTogether, a list of each atheletes aggregate times, without accounting for waves
	# i.e. these are the individual aggregate times after each leg if everybody started at the same time
	timesByOverallStartTogether = []
	for athlete in r:
	times = []
	times.append(athlete[Col.bib])
	times.append(athlete[Col.swimTime])
	times.append(athlete[Col.swimTime] + athlete[Col.t1Time])
	times.append(athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime])
	times.append(athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime] + athlete[Col.t2Time])
	times.append(athlete[Col.swimTime] + athlete[Col.t1Time] + athlete[Col.bikeTime] + athlete[Col.t2Time] + athlete[Col.runTime])
	timesByOverallStartTogether.append(times)

	# create different sorted lists
	def getKeySwim(row):
	return row[1]
	def getKeyT1(row):
	return row[2]
	def getKeyBike(row):
	return row[3]
	def getKeyT2(row):
	return row[4]
	def getKeyRun(row):
	return row[5]

	timesBySwim = sorted(timesByOverall, key=getKeySwim)
	timesByT1 = sorted(timesByOverall, key=getKeyT1)
	timesByBike = sorted(timesByOverall, key=getKeyBike)
	timesByT2 = sorted(timesByOverall, key=getKeyT2)
	timesByRun = sorted(timesByOverall, key=getKeyRun)

	timesBySwimStartTogether = sorted(timesByOverallStartTogether, key=getKeySwim)
	timesByT1StartTogether = sorted(timesByOverallStartTogether, key=getKeyT1)
	timesByBikeStartTogether = sorted(timesByOverallStartTogether, key=getKeyBike)
	timesByT2StartTogether = sorted(timesByOverallStartTogether, key=getKeyT2)
	timesByRunStartTogether = sorted(timesByOverallStartTogether, key=getKeyRun)

	#-- returns number of how many people you passed IN EACH OF the legs T-1, Bike, T-2, and the run, given wave starts
	#-- it does this by taking your rank before a leg, and substracting your rank after that leg.
	#-- positive means you passed that many people, negative means that many people passed you
	def numPplPassed(bib):
	nPassed = []
	nPassed.append(findIndexOfBibInLeg(bib, timesBySwim) - findIndexOfBibInLeg(bib, timesByT1)) # number of people passed in T1, seems opposite but we're using rank, which is opposite (1 is better than 2)
	nPassed.append(findIndexOfBibInLeg(bib, timesByT1) - findIndexOfBibInLeg(bib, timesByBike))
	nPassed.append(findIndexOfBibInLeg(bib, timesByBike) - findIndexOfBibInLeg(bib, timesByT2))
	nPassed.append(findIndexOfBibInLeg(bib, timesByT2) - findIndexOfBibInLeg(bib, timesByRun))
	return nPassed

	#-- returns number of how many people you passed IN EACH OF the legs T-1, Bike, T-2, and the run if everyone started together
	def numPplPassedStartTogether(bib):
	nPassed = []
	nPassed.append(findIndexOfBibInLeg(bib, timesBySwimStartTogether) - findIndexOfBibInLeg(bib, timesByT1StartTogether))
	nPassed.append(findIndexOfBibInLeg(bib, timesByT1StartTogether) - findIndexOfBibInLeg(bib, timesByBikeStartTogether))
	nPassed.append(findIndexOfBibInLeg(bib, timesByBikeStartTogether) - findIndexOfBibInLeg(bib, timesByT2StartTogether))
	nPassed.append(findIndexOfBibInLeg(bib, timesByT2StartTogether) - findIndexOfBibInLeg(bib, timesByRunStartTogether))
	return nPassed


	#-- returns aggregate number of people you passed SO FAR after each leg T-1, Bike, T-2, and the run
	# def aggregatePplPassed(bib):
	# nPassed = []
	# nPassed.append(findIndexOfBibInLeg(bib, timesBySwim) - findIndexOfBibInLeg(bib, timesByT1))
	# nPassed.append(findIndexOfBibInLeg(bib, timesBySwim) - findIndexOfBibInLeg(bib, timesByBike))
	# nPassed.append(findIndexOfBibInLeg(bib, timesBySwim) - findIndexOfBibInLeg(bib, timesByT2))
	# nPassed.append(findIndexOfBibInLeg(bib, timesBySwim) - findIndexOfBibInLeg(bib, timesByRun))
	# return nPassed


	#-- returns the number of people you passed in waves that started before you
	#-- algorithm: find your swim time and division
	#-- then find all people in waves before you who had longer times = number of people you passed
	#-- find all people in waves after you (i.e. excluding your group and preceding gorups)
	#-- who had shorter swim times + wave start time = number of people passed you
	#-- messy brute force way
	def numPplPassedInSwim(bib):
	targetAthlete = []
	youPassed = []
	passedYou = []
	for athlete in r:
	if athlete[Col.bib] == str(bib):
	targetAthlete = athlete
	break

	if athlete[Col.cat] == 'P':
	passedYou = [athlete for athlete in r if ((athlete[Col.cat] != 'P') and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime]))]

	elif targetAthlete[Col.div] in wave2:
	excludingGroups = wave2 # and pros
	youPassed = [athlete for athlete in r if (athlete[Col.cat] == 'P' and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
	passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	elif targetAthlete[Col.div] in wave3:
	groupsBeforeYou = wave2 # and pros
	excludingGroups = groupsBeforeYou + wave3
	youPassed = [athlete for athlete in r if ((athlete[Col.cat] == 'P' or (athlete[Col.div] in groupsBeforeYou)) and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
	passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	elif targetAthlete[Col.div] in wave4:
	groupsBeforeYou = wave2 + wave3 # and pros
	excludingGroups = groupsBeforeYou + wave4
	youPassed = [athlete for athlete in r if ((athlete[Col.cat] == 'P' or (athlete[Col.div] in groupsBeforeYou)) and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
	passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	elif targetAthlete[Col.div] in wave5:
	groupsBeforeYou = wave2 + wave3 + wave4 # and pros
	excludingGroups = groupsBeforeYou + wave5
	youPassed = [athlete for athlete in r if ((athlete[Col.cat] == 'P' or (athlete[Col.div] in groupsBeforeYou)) and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
	passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	elif targetAthlete[Col.div] in wave6:
	groupsBeforeYou = wave2 + wave3 + wave4 + wave5 # and pros
	excludingGroups = groupsBeforeYou + wave6
	youPassed = [athlete for athlete in r if ((athlete[Col.cat] == 'P' or (athlete[Col.div] in groupsBeforeYou)) and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
	passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	elif targetAthlete[Col.div] in wave7:
	groupsBeforeYou = wave2 + wave3 + wave4 + wave5 + wave6 # and pros
	excludingGroups = groupsBeforeYou + wave7
	youPassed = [athlete for athlete in r if ((athlete[Col.cat] == 'P' or (athlete[Col.div] in groupsBeforeYou)) and (athlete[Col.swimTime] > targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]
	passedYou = [athlete for athlete in r if (athlete[Col.cat] != 'P' and (athlete[Col.div] not in excludingGroups) and (athlete[Col.swimTime] + getStartTime(athlete) < targetAthlete[Col.swimTime] + getStartTime(targetAthlete)))]

	return len(youPassed) - len(passedYou)



	#---------------------------------------------------
	# append the calculated numbers onto the athlete object to be written to the file
	for athlete in r:
	athlete.append(numPplPassedInSwim(athlete[Col.bib])) #special case swim
	passedNumbers = numPplPassed(athlete[Col.bib]) # for each athlete, calculate the number of people passed with wave starts in each leg
	for stat in passedNumbers:
	athlete.append(stat)

	passedNumbersStartTogether = numPplPassedStartTogether(athlete[Col.bib]) # do the same but ignoring wave starts
	for stat in passedNumbersStartTogether:
	athlete.append(stat)

	#-- annnnd append the estimated rank for t1 to show it in the percentiles
	athlete.append(findIndexOfBibInLeg(athlete[Col.bib], timesByT1) + 1) # rank = index + 1



	#convert datetimes to seconds for d3 to use easier
	for line in r:
	for i, datum in enumerate(line):
	if type(datum) is timedelta:
	line[i] = int(datum.total_seconds())


	# when this script is run, results.csv is overwritten with the caluclated numbers
	header = ['place','divTot','bib','cat','firstName','lastName','age','sex','div','swimRank','swimTime','swimPace','t1Time','bikeRank','bikeTime','bikeSpeed','t2Rank','t2Time','runRank','runTime','runPace','totalTime','wave','wavePassedSwim','wavePassedT1','wavePassedBike','wavePassedT2','wavePassedRun','startPassedT1','startPassedBike','startPassedT2','startPassedRun', 'estimatedT1Rank']
	with open('results.csv', 'w', newline='') as csvfile:
	writer = csv.writer(csvfile, delimiter=',')
	writer.writerow(header)
	for line in r:
	writer.writerow(line)


	#---------------------------------------------------


	#-- number of people "passed" is aggregate rank after leg_1 - aggregate rank after leg_2
	#-- if negative, people passed you
	# print('In the swim, you passed %s people in the waves ahead of you' % numPplPassedInSwim(122))
	# print(numPplPassed(122))
	# print(aggregatePplPassed(122))


	# passed = []
	# for i in range(0, 1020):
	# try:
	# passed.append(findIndexOfBibInLeg(i, timesBySwim) - findIndexOfBibInLeg(i, timesByRun))
	# except (RuntimeError, TypeError, NameError):
	# pass
	# print(sorted(passed)) #-- to see the distribution of overall number of people passed

	#convert to dictionary (key = bib num, value = list of aggregate times)


	# algorithm
	# given bib number, output number and list of people who you passed or passed you in a given leg
	#----------- if everyone started at the same time
	# compute aggregated time for each leg
	# sort based on aggregated time (i.e. get overall rank at each leg finish)
	# passed those with higher rank in previous leg

	#----------- given wave start
	# first wave starts at 00:00:00
	# second wave starts at 00:00:05
	# third wave starts at 00:00;10, etc
	# rank is not given until after the athlete has completed the swim