Skip to content

Instantly share code, notes, and snippets.

@flodel
Created July 12, 2015 13:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save flodel/1a33fd79386b2b1b4b60 to your computer and use it in GitHub Desktop.
Save flodel/1a33fd79386b2b1b4b60 to your computer and use it in GitHub Desktop.
Script to scrape the 2015 AJC Peachtree Road Race results
#############################################################################
## DOWNLOAD and PARSE data
base_url <- "http://trackshackresults.com/peachtree/results/2015/ptresults.php"
divisions_map <- read.table(text = '
Ind Div Division
1 1B "****MEN -- OPEN****"
2 1G "****WOMEN -- OPEN****"
3 1M "****MEN -- MASTERS****"
4 1N "****WOMEN -- MASTERS****"
5 C "MEN -- 14 AND UNDER"
6 D "MEN -- 15 THROUGH 19"
7 E "MEN -- 20 THROUGH 24"
8 F "MEN -- 25 THROUGH 29"
9 G "MEN -- 30 THROUGH 34"
10 H "MEN -- 35 THROUGH 39"
11 I "MEN -- 40 THROUGH 44"
12 J "MEN -- 45 THROUGH 49"
13 K "MEN -- 50 THROUGH 54"
14 L "MEN -- 55 THROUGH 59"
15 M "MEN -- 60 THROUGH 64"
16 MA "MEN -- 65 THROUGH 69"
17 N "MEN -- 70 THOURGH 74"
18 NA "MEN -- 75 THROUGH 79"
19 NB "MEN -- 80 AND OVER"
20 Q "WOMEN -- 14 AND UNDER"
21 R "WOMEN -- 15 THROUGH 19"
22 S "WOMEN -- 20 THROUGH 24"
23 SA "WOMEN -- 25 THROUGH 29"
24 U "WOMEN -- 30 THROUGH 34"
25 V "WOMEN -- 35 THROUGH 39"
26 W "WOMEN -- 40 THROUGH 44"
27 X "WOMEN -- 45 THROUGH 49"
28 Y "WOMEN -- 50 THROUGH 54"
29 Z "WOMEN -- 55 THROUGH 59"
30 ZA "WOMEN -- 60 THROUGH 64"
31 ZB "WOMEN -- 65 THROUGH 69"
32 ZC "WOMEN -- 70 THROUGH 74"
33 ZD "WOMEN -- 75 THROUGH 79"
34 ZE "WOMEN -- 80 AND OVER"
', header = TRUE, na.strings = "")
all_urls <- with(divisions_map,
setNames(sprintf("%s?Link=10&Type=2&Div=%s&Ind=%s",
base_url, Div, Ind),
Division))
# pull data from website into a list of data.frames (one per Division)
library(XML)
data <- lapply(all_urls, readHTMLTable, which = 6, skip.rows = 1)
# append Division column
data <- Map(cbind, data, Division = names(data))
# collapse everything into one data.frame
data <- do.call(rbind, data)
rownames(data) <- NULL
# rename columns
colnames(data) <- c("DivisionPlace", "FullName", "BibNumber", "Age",
"Place", "GenderPlace", "ClockTime", "NetTime",
"Hometown", "Division")
#############################################################################
## Data Manipulation
TimeInMinutes <- function(tim) {
# this prepends sub-hour times with "0:"
tim <- sub("^(\\d{1,2}:\\d{1,2})$", "0:\\1", tim)
as.numeric(as.difftime(tim , format = "%H:%M:%S", units = "mins"))
}
data <- within(data, {
PrintTime = NetTime
ClockTime = TimeInMinutes(ClockTime)
NetTime = TimeInMinutes(NetTime)
StartTime = ClockTime - NetTime
Gender = ifelse(grepl("WOMEN", Division), "F", "M")
})
#############################################################################
## Export
export <- with(data,
data.frame(Place = Place,
FullName = FullName,
Age = Age,
Gender = Gender,
Hometown = Hometown,
NetTime = sprintf("%.3f", NetTime),
StartTime = sprintf("%.3f", StartTime),
PrintTime = PrintTime,
stringsAsFactors = FALSE)
)
write.csv(export, file = "2015.csv", row.names = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment