Skip to content

Instantly share code, notes, and snippets.

Created November 22, 2018 01:07
Show Gist options
  • Save mcohen01/9df22e5819dca2d0ba5b2a6eb87b4732 to your computer and use it in GitHub Desktop.
Save mcohen01/9df22e5819dca2d0ba5b2a6eb87b4732 to your computer and use it in GitHub Desktop.
import re
import PyPDF2
from collections import defaultdict
import matplotlib.pyplot as plt
import warnings
import seaborn as sns
riders = [
file = open('laps.pdf', 'rb')
fileReader = PyPDF2.PdfFileReader(file)
laps = defaultdict(list)
lap_time_pattern = "1'[2,3][0-9]\.[0-9]{3}"
for i in range(fileReader.numPages):
lines = fileReader.getPage(i).extractText().split('\n')
skip_next = False
for line in lines:
for token in line.split(' '):
if token in riders and line != ', 2018Maverick VIÑALES':
rider = laps[token]
if line == ', 2018Maverick VIÑALES':
skip_next = True
match = re.match(lap_time_pattern, token)
funky_match = re.match('dT4' + lap_time_pattern, line)
if funky_match:
rider.append('dT4', ''))
if match:
if skip_next:
skip_next = False
fig, ax = plt.subplots(figsize=(13, 9))
cnt = 0
max_riders = 9
max_laps_per_rider = 45
for rider in laps.keys():
if rider == 'ESPARGARO': continue;
if cnt == max_riders: break;
times = sorted([round(float(lap[2:]) + 60, 3) for lap in laps[rider]])[:max_laps_per_rider]
plt.plot(times, label=rider)
cnt += 1
ax.set_ylabel('lap time')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment