Skip to content

Instantly share code, notes, and snippets.

@dbrby
Last active August 3, 2021 13:50
Show Gist options
  • Save dbrby/06c3358fcf885c720536cdc6e99529e6 to your computer and use it in GitHub Desktop.
Save dbrby/06c3358fcf885c720536cdc6e99529e6 to your computer and use it in GitHub Desktop.
Scrape voting records from Senedd Records
require(rvest)
require(tidyverse)
require(readr)
require(data.table)
require(XML)
require(lubridate)
require(pbapply)
# https://record.senedd.wales/XMLExport/?start=2016-05-05&end=2017-06-05 (results page)
front <- "https://record.senedd.wales/XMLExport/?start="
mid <- "&end="
dates <- seq(from = as.Date("2016/5/5"), to = Sys.Date(), "month")
links <- paste0(front, dates, mid, dates + months(1))
results_html <- pblapply(links, read_html)
links_xml_recs <- pblapply(results_html, function(get_links) {
get_links %>% html_elements("a") %>% html_attr("href")
})
links_xml_recs <- links_xml_recs %>% unlist() %>% str_subset(pattern = "Download?")
votes <- links_xml_recs %>% str_subset(pattern = "Votes") %>%
paste0("https://record.senedd.wales", .)
path <- "/Users/danbraby/Dropbox/parlCymru/raw_data/"
vote_files <- list.files(path = paste0(path, "votes/"), full.names = TRUE)
votes_xml <- pblapply(vote_files, function(xml_formatter) {
xml_formatter %>% xmlParse() %>% xmlToList()
})
vote_data <- NA
vote_data <- vote_data %>% as.list()
for(i in 1:length(vote_xml)) {
vote_data[[i]] <- votes_xml[[i]][1:length(votes_xml[[i]]) - 1] %>% rbindlist(fill = TRUE)
}
vote_data <- vote_data %>% bind_rows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment