Skip to content

Instantly share code, notes, and snippets.

@erikgregorywebb
Created February 21, 2022 06:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikgregorywebb/4d26aa3c1f130368088eb2988f3cd467 to your computer and use it in GitHub Desktop.
Save erikgregorywebb/4d26aa3c1f130368088eb2988f3cd467 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(rvest)
### EXTRACT
# marathon
url = 'https://mesamarathon.com/results?sort=&race=166724&date=&event=Marathon&gender=&division=&search=&page_167031=1&size_167031=100000&page_167067=1&size_167067=25'
page = read_html(url)
raw_marathon = page %>% html_table() %>% nth(., 2)
# half marathon
url = 'https://mesamarathon.com/results?sort=&race=166724&date=&event=Half+Marathon&gender=&division=&search=&page_167032=1&size_167032=100000&page_167068=1&size_167068=25'
page = read_html(url)
raw_half_marathon = page %>% html_table() %>% nth(., 2)
### CLEAN
hm = raw_half_marathon %>%
rename(bib_no = `Bib #`, sex = Sex, age = Age,
place_overall = `Place Overall`, place_gender = `Place Gender`, place_division = `Place Div`,
chip_time_chr = `Chip Time`, gun_time_chr = `Gun Time`, pace_chr = `Pace (min/miles)`) %>%
select(bib_no, sex, age, place_overall, place_gender, place_division, chip_time_chr, gun_time_chr, pace_chr) %>%
filter(chip_time_chr != '') %>% # remove runners without a finish time
separate(chip_time_chr, sep = ':', into = c('chip_time_chr_h', 'chip_time_chr_m', 'chip_time_chr_s'), remove = FALSE) %>%
mutate(chip_time_minutes = (as.numeric(chip_time_chr_h) * 60) + as.numeric(chip_time_chr_m) + (as.numeric(chip_time_chr_s) / 60)) %>%
select(-chip_time_chr_h, -chip_time_chr_m, -chip_time_chr_s) %>%
mutate(age_bucket = cut(hm$age, breaks=c(0, 20, 30, 40, 50, 60, 100), right = FALSE, labels = FALSE)) %>%
arrange(chip_time_minutes)
#### PLOT
# scatter
ggplot(hm, aes(x = age, y = chip_time_minutes, col = sex)) +
geom_jitter()
# scatter by age bucket
ggplot(hm, aes(x = age, y = chip_time_minutes, col = sex)) +
geom_jitter() +
facet_wrap(~age_bucket, scales = 'free')
# density
hm %>%
filter(sex %in% c('M', 'F')) %>%
ggplot(., aes(x = chip_time_minutes, fill = sex)) +
geom_density(alpha = .4)
# new
hm %>%
ggplot(., aes(x = place_overall, y = chip_time_minutes)) +
geom_line()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment