Created
February 25, 2017 23:20
-
-
Save mcohen01/13df19b7f4184ec1e7298aa4cbd0bcb0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(readr) | |
library(reshape2) | |
library(stringr) | |
library(ggplot2) | |
library(pdftools) | |
setwd('/Users/mcohen/Downloads/Scratch/codeplay/R/motogp/lap_times') | |
pattern <- "[0-9]{1}\\:[0-9]{2}\\.[0-9]{3}" | |
rider_name_pattern <- '\\([0-9]+\\)\\s[a-zA-Z]+\\s[A-Z]+' | |
rider_name_no_pattern <- '\\([0-9]+\\)\\s[a-zA-Z]+\\s[A-Z\u00f1\u00d1]+' | |
rider_no_pattern <- '\\([0-9]+\\)' | |
all_lines <- function(pdf, pages) { | |
unlist( | |
sapply(1:pages, function(index) { | |
strsplit(pdf[index], '\n')[[1]] | |
}) | |
) | |
} | |
rider_from_line <- function(line, is_left) { | |
pattern <- if (is_left) { | |
str_c('^', rider_name_no_pattern) | |
} else { | |
str_c(rider_name_no_pattern, '$') | |
} | |
match <- gregexpr(pattern, line)[[1]] | |
start <- match[1] | |
end <- attr(match, 'match.length') + match[1] | |
full_name <- substr(line, start, end) | |
full_name <- strsplit(full_name, ' ')[[1]] | |
if (full_name[2] == 'Andrea') { | |
str_c(full_name[2], substr(full_name[3], 1, 1)) | |
} else { | |
full_name[2] | |
} | |
} | |
lap_time <- function(s) { | |
m <- strsplit(as.character(s), '\\:') | |
s <- strsplit(m[[1]][2], '\\.') | |
(as.numeric(m[[1]][1]) * 60) + | |
as.numeric(s[[1]][1]) + | |
(as.numeric(s[[1]][2])/1000) | |
} | |
parse.times <- function(line) { | |
pattern <- "\\s{1}[0-9]{1}\\:[0-9]{2}\\.[0-9]{3}" | |
matches <- gregexpr(pattern, line)[[1]] | |
exists <- length(matches) > 1 || matches[[1]] > -1 | |
matches <- if (exists) unlist(matches) else 0 | |
if (length(matches) > 0 && matches[1] > 0) { | |
left <- matches[matches < 30] | |
right <- matches[matches < 80 & matches > 60] | |
f <- function(x) substr(line, x + 1, x + 8) | |
list(sapply(left, f), sapply(right, f)) | |
} | |
} | |
column_laps <- function(lines, rider, is_left, lap_no) { | |
pattern <- if (is_left) { | |
str_c('^', rider_name_no_pattern) | |
} else { | |
str_c(rider_name_no_pattern, '$') | |
} | |
rider_line <- grep(pattern, lines) | |
laps <- data.frame(rider=NA, time=NA, lap=NA) | |
for (i in seq_along(lines)) { | |
if (length(rider_line) > 0 && i == rider_line) { | |
rider <- rider_from_line(lines[i], is_left) | |
lap_no <- 1 | |
} | |
column <- if (is_left) 1 else 2 | |
time <- unlist(parse.times(lines[i])[[column]]) | |
if (! is.null(time)) { | |
laps <- add_row(laps, rider = rider, | |
time = lap_time(time), | |
lap = lap_no) | |
lap_no <- lap_no + 1 | |
} | |
} | |
laps[-1, ] | |
} | |
page_laps <- function(page, rider, lap_no) { | |
lines <- strsplit(page, '\n')[[1]] | |
left <- column_laps(lines, rider, T, lap_no) | |
rider <- tail(left$rider, 1) | |
lap_no <- tail(left$lap, 1) | |
right <- column_laps(lines, rider, F, lap_no) | |
rbind(left, right) | |
} | |
all_laps <- function(pdf, pages) { | |
lines <- strsplit(pdf[1], '\n')[[1]] | |
line <- grep(str_c('^', rider_name_no_pattern), lines) | |
rider <- rider_from_line(lines[line], T) | |
laps <- page_laps(pdf[1], rider, 1) | |
for (i in seq(2, pages)) { | |
rider <- tail(laps$rider, 1) | |
lap_no <- tail(laps$lap, 1) | |
page <- page_laps(pdf[i], rider, lap_no) | |
laps <- rbind(laps, page) | |
} | |
laps | |
} | |
plot <- function(data, show_points=F) { | |
g <- ggplot(data) | |
if (length(colnames(data)) == 3) { | |
g <- g + geom_line(aes(x=lap, y = time, color=rider)) | |
} else { | |
g <- g + geom_line(aes(x=lap, y = time), color='steelblue') | |
} | |
if (show_points) { | |
g <- g + geom_point(aes(x=lap, y = time, color=rider)) | |
} | |
print(g) | |
} | |
plot_laps <- function(laps, topK_riders) { | |
filter(laps, time < max_time) %>% | |
filter(time > min_time) %>% | |
filter(rider %in% unique(laps$rider)[1:topK_riders]) %>% | |
arrange(rider, time) %>% | |
group_by(rider) %>% | |
mutate(lap = row_number()) %>% | |
plot() | |
} | |
main <- function(topK_riders) { | |
pdf <- pdf_text(paste(data_dir, '/', analysis, sep = '')) | |
pages <- pdf_info(paste(data_dir, '/', analysis, sep = ''))$pages | |
all_laps(pdf, pages) %>% | |
plot_laps(topK_riders) | |
} | |
data_dir <- '20170217' | |
analysis <- 'analysis.pdf' | |
min_time <- 85 | |
max_time <- 95 | |
main(10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment