Skip to content

Instantly share code, notes, and snippets.

@mcohen01
Created November 22, 2018 01:07
Show Gist options
  • Save mcohen01/9df22e5819dca2d0ba5b2a6eb87b4732 to your computer and use it in GitHub Desktop.
Save mcohen01/9df22e5819dca2d0ba5b2a6eb87b4732 to your computer and use it in GitHub Desktop.
import re
import PyPDF2
from collections import defaultdict
import matplotlib.pyplot as plt
import warnings
import seaborn as sns
sns.set()
warnings.filterwarnings('ignore')
riders = [
'MARQUEZ', 'ROSSI', 'DOVIZIOSO', 'BRADL', 'MORBIDEL',
'ESPARGARO', 'PETRUCCI', 'ESPARGARO', 'PIRRO', 'BAGNAI',
'RINS', 'NAKAGAMI', 'MILLER', 'MIR', 'RABAT', 'ZARCO',
'LORENZO', 'IANNONE', 'ABRAHAM', 'SMITH', 'FOLGER', 'QUARTARARO',
'SYAHRIN', 'OLIVEIRA', 'VIÑALES'
]
file = open('laps.pdf', 'rb')
fileReader = PyPDF2.PdfFileReader(file)
laps = defaultdict(list)
lap_time_pattern = "1'[2,3][0-9]\.[0-9]{3}"
for i in range(fileReader.numPages):
lines = fileReader.getPage(i).extractText().split('\n')
skip_next = False
for line in lines:
for token in line.split(' '):
if token in riders and line != ', 2018Maverick VIÑALES':
rider = laps[token]
if line == ', 2018Maverick VIÑALES':
skip_next = True
match = re.match(lap_time_pattern, token)
funky_match = re.match('dT4' + lap_time_pattern, line)
if funky_match:
rider.append(funky_match.group(0).replace('dT4', ''))
if match:
if skip_next:
skip_next = False
else:
rider.append(match.group(0))
fig, ax = plt.subplots(figsize=(13, 9))
cnt = 0
max_riders = 9
max_laps_per_rider = 45
for rider in laps.keys():
if rider == 'ESPARGARO': continue;
if cnt == max_riders: break;
times = sorted([round(float(lap[2:]) + 60, 3) for lap in laps[rider]])[:max_laps_per_rider]
plt.plot(times, label=rider)
cnt += 1
ax.set_xlabel('laps')
ax.set_ylabel('lap time')
plt.legend()
plt.savefig('lap_times_11.20.2018.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment