Skip to content

Instantly share code, notes, and snippets.

@mcohen01
Created February 25, 2017 23:20
Show Gist options
  • Save mcohen01/13df19b7f4184ec1e7298aa4cbd0bcb0 to your computer and use it in GitHub Desktop.
Save mcohen01/13df19b7f4184ec1e7298aa4cbd0bcb0 to your computer and use it in GitHub Desktop.
library(dplyr)
library(readr)
library(reshape2)
library(stringr)
library(ggplot2)
library(pdftools)
setwd('/Users/mcohen/Downloads/Scratch/codeplay/R/motogp/lap_times')
pattern <- "[0-9]{1}\\:[0-9]{2}\\.[0-9]{3}"
rider_name_pattern <- '\\([0-9]+\\)\\s[a-zA-Z]+\\s[A-Z]+'
rider_name_no_pattern <- '\\([0-9]+\\)\\s[a-zA-Z]+\\s[A-Z\u00f1\u00d1]+'
rider_no_pattern <- '\\([0-9]+\\)'
all_lines <- function(pdf, pages) {
unlist(
sapply(1:pages, function(index) {
strsplit(pdf[index], '\n')[[1]]
})
)
}
rider_from_line <- function(line, is_left) {
pattern <- if (is_left) {
str_c('^', rider_name_no_pattern)
} else {
str_c(rider_name_no_pattern, '$')
}
match <- gregexpr(pattern, line)[[1]]
start <- match[1]
end <- attr(match, 'match.length') + match[1]
full_name <- substr(line, start, end)
full_name <- strsplit(full_name, ' ')[[1]]
if (full_name[2] == 'Andrea') {
str_c(full_name[2], substr(full_name[3], 1, 1))
} else {
full_name[2]
}
}
lap_time <- function(s) {
m <- strsplit(as.character(s), '\\:')
s <- strsplit(m[[1]][2], '\\.')
(as.numeric(m[[1]][1]) * 60) +
as.numeric(s[[1]][1]) +
(as.numeric(s[[1]][2])/1000)
}
parse.times <- function(line) {
pattern <- "\\s{1}[0-9]{1}\\:[0-9]{2}\\.[0-9]{3}"
matches <- gregexpr(pattern, line)[[1]]
exists <- length(matches) > 1 || matches[[1]] > -1
matches <- if (exists) unlist(matches) else 0
if (length(matches) > 0 && matches[1] > 0) {
left <- matches[matches < 30]
right <- matches[matches < 80 & matches > 60]
f <- function(x) substr(line, x + 1, x + 8)
list(sapply(left, f), sapply(right, f))
}
}
column_laps <- function(lines, rider, is_left, lap_no) {
pattern <- if (is_left) {
str_c('^', rider_name_no_pattern)
} else {
str_c(rider_name_no_pattern, '$')
}
rider_line <- grep(pattern, lines)
laps <- data.frame(rider=NA, time=NA, lap=NA)
for (i in seq_along(lines)) {
if (length(rider_line) > 0 && i == rider_line) {
rider <- rider_from_line(lines[i], is_left)
lap_no <- 1
}
column <- if (is_left) 1 else 2
time <- unlist(parse.times(lines[i])[[column]])
if (! is.null(time)) {
laps <- add_row(laps, rider = rider,
time = lap_time(time),
lap = lap_no)
lap_no <- lap_no + 1
}
}
laps[-1, ]
}
page_laps <- function(page, rider, lap_no) {
lines <- strsplit(page, '\n')[[1]]
left <- column_laps(lines, rider, T, lap_no)
rider <- tail(left$rider, 1)
lap_no <- tail(left$lap, 1)
right <- column_laps(lines, rider, F, lap_no)
rbind(left, right)
}
all_laps <- function(pdf, pages) {
lines <- strsplit(pdf[1], '\n')[[1]]
line <- grep(str_c('^', rider_name_no_pattern), lines)
rider <- rider_from_line(lines[line], T)
laps <- page_laps(pdf[1], rider, 1)
for (i in seq(2, pages)) {
rider <- tail(laps$rider, 1)
lap_no <- tail(laps$lap, 1)
page <- page_laps(pdf[i], rider, lap_no)
laps <- rbind(laps, page)
}
laps
}
plot <- function(data, show_points=F) {
g <- ggplot(data)
if (length(colnames(data)) == 3) {
g <- g + geom_line(aes(x=lap, y = time, color=rider))
} else {
g <- g + geom_line(aes(x=lap, y = time), color='steelblue')
}
if (show_points) {
g <- g + geom_point(aes(x=lap, y = time, color=rider))
}
print(g)
}
plot_laps <- function(laps, topK_riders) {
filter(laps, time < max_time) %>%
filter(time > min_time) %>%
filter(rider %in% unique(laps$rider)[1:topK_riders]) %>%
arrange(rider, time) %>%
group_by(rider) %>%
mutate(lap = row_number()) %>%
plot()
}
main <- function(topK_riders) {
pdf <- pdf_text(paste(data_dir, '/', analysis, sep = ''))
pages <- pdf_info(paste(data_dir, '/', analysis, sep = ''))$pages
all_laps(pdf, pages) %>%
plot_laps(topK_riders)
}
data_dir <- '20170217'
analysis <- 'analysis.pdf'
min_time <- 85
max_time <- 95
main(10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment