Skip to content

Instantly share code, notes, and snippets.

@hadley
Created May 30, 2019 21:11
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hadley/49e23f8761f0630f18dccd840f8e72e1 to your computer and use it in GitHub Desktop.
Save hadley/49e23f8761f0630f18dccd840f8e72e1 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(rvest)
url <- "https://en.wikipedia.org/wiki/The_Great_British_Bake_Off_(series_3)"
page <- read_html(url)
table <- page %>%
html_nodes("table.wikitable") %>%
.[[2]]
rows <- table %>% html_nodes("tr")
# Figure it out for one row
baker_elimination <- function(row) {
cells <- row %>% html_nodes("td")
baker <- cells %>% .[[1]] %>% html_text()
results <- cells %>% .[-1]
colspan <- results %>% html_attr("colspan") %>% as.numeric() %>% coalesce(1)
colour <- results %>% html_attr("style") %>% str_match("background:(.*?);") %>% .[ ,2]
tibble(
baker = baker,
colour = rep(colour, colspan),
week = seq_len(sum(colspan))
)
}
results <- rows[-c(1:2)] %>% map_dfr(baker_elimination)
status <- c(
"lightblue" = "ok",
"lemonchiffon" = "star",
"plum" = "bottom",
"cornflowerblue" = "top",
"yellow" = "winner",
"limegreen" = "runner-up",
"orangered" = "out",
"silver" = NA
)
results %>%
mutate(status = unname(status[tolower(colour)]), colour = NULL) %>%
filter(!is.na(status))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment