Last active
August 29, 2015 14:04
-
-
Save durtal/b94d32cd3e1bafd1d426 to your computer and use it in GitHub Desktop.
Helper function to convert Turftrax PDF times to neat dataframe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SHORT SCRIPT TO GET TURFTRAX TIMES FROM PDF INTO R | |
# example PDF http://www.turftrax.co.uk/tracking/Glorious2014/GWD290714R1_1M2F.pdf | |
# highlight horses names and times in the table in Turftrax PDF using mouse | |
# (don't highlight the table headers) | |
# copy to clipboard using "Ctrl-C" | |
# in R (once PDF table is copied to clipboard) | |
df <- read.delim("clipboard", h=F, stringsAsFactors=F) | |
# for example PDF (above) | |
dim(df) | |
##[1] 18 1 | |
head(df) | |
# V1 | |
# 3 Sennockian Star 5 13.69 11.86 12.19 12.71 14.14 11.91 11.15 11.59 11.71 11.89 | |
# 14 Ajman Bridge 18 13.93 11.95 12.36 12.87 14.30 11.68 11.16 11.08 11.47 12.14 | |
# 6 Salutation 16 13.60 11.66 12.27 12.87 14.07 11.85 11.06 11.35 11.70 12.60 | |
# 15 Busatto 12 13.68 11.55 12.03 12.81 14.07 11.97 11.21 11.39 11.72 12.74 | |
# 16 Charles Camoin 8 14.04 12.15 12.31 13.04 14.09 11.61 11.04 11.38 11.68 12.19 | |
# 4 Blue Surf 1 13.93 11.87 12.01 12.80 14.08 11.82 11.17 11.33 11.92 12.73 | |
# this function will extract the horses names and sectional times from df above | |
turftrax <- function(df) { | |
# load required libraries | |
library(stringr, quietly = TRUE) | |
library(plyr, quietly = TRUE) | |
# extract horses name (apostrophes will be removed from any name) | |
horse <- str_replace_all(df$V1, "[[:digit:]]|[[:punct:]]", "") | |
# clean up name, by removing double spaces and trim whitespace | |
horse <- str_trim(str_replace_all(horse, "\\s+", " "), side = "both") | |
# extract times and convert into dataframe | |
times <- str_extract_all(df$V1, "[[:digit:]]+\\.[[:digit:]]+") | |
times <- ldply(times) | |
# combine horses names and times | |
df <- cbind(horse, times) | |
return(df) | |
} | |
newdf <- turftrax(df) | |
head(newdf) | |
# horse V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 | |
# Sennockian Star 13.69 11.86 12.19 12.71 14.14 11.91 11.15 11.59 11.71 11.89 | |
# Ajman Bridge 13.93 11.95 12.36 12.87 14.30 11.68 11.16 11.08 11.47 12.14 | |
# Salutation 13.60 11.66 12.27 12.87 14.07 11.85 11.06 11.35 11.70 12.60 | |
# Busatto 13.68 11.55 12.03 12.81 14.07 11.97 11.21 11.39 11.72 12.74 | |
# Charles Camoin 14.04 12.15 12.31 13.04 14.09 11.61 11.04 11.38 11.68 12.19 | |
# Blue Surf 13.93 11.87 12.01 12.80 14.08 11.82 11.17 11.33 11.92 12.73 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment