Skip to content

Instantly share code, notes, and snippets.

@dbrby
Last active September 14, 2021 19:54
Show Gist options
  • Save dbrby/8c778a584a559b62003c36676a7037e3 to your computer and use it in GitHub Desktop.
Save dbrby/8c778a584a559b62003c36676a7037e3 to your computer and use it in GitHub Desktop.
Parse XML vote records from record.senedd, match to wikidata and estimate IPEs
## Estimate Ideal Points for Members of the Fifth Senedd from RCVs
#install.packages("pacman")
#install.packages('remotes')
remotes::install_github('wmay/dwnominate')
pacman::p_load(rvest, lubridate, tidyverse, readr, data.table,
XML, pbapply, tidywikidatar, reshape2, pscl,
wnominate)
# Generate URLs and download XML files pertaining to Votes
front <- "https://record.senedd.wales/XMLExport/?start="
mid <- "&end="
dates <- seq(from = as.Date("2016/5/5"), to = as.Date("2020/5/5"), "month")
links <- paste0(front, dates, mid, dates + months(1))
results_html <- pblapply(links, read_html)
links_xml_recs <- pblapply(results_html, function(get_links) {
get_links %>% html_elements("a") %>% html_attr("href")
})
links_xml_recs <- links_xml_recs %>% unlist() %>% str_subset(pattern = "Download?")
votes <- links_xml_recs %>% str_subset(pattern = "Votes") %>%
paste0("https://record.senedd.wales", .)
path <- "/Users/danbraby/Dropbox/cymru/gist/" #Where we store the raw XML files
for (i in 1:length(votes)) {
download.file(url = votes[i], destfile = paste0(path, i, ".xml"))
}
votes_files <- list.files(path = path, full.names = TRUE)
votes_xml <- pblapply(votes_files, function(xml_formatter) {
xml_formatter %>% xmlParse() %>% xmlToList()
})
votes_data <- NA
votes_data <- votes_data %>% as.list()
for(i in 1:length(votes_xml)) {
votes_data[[i]] <- votes_xml[[i]][1:length(votes_xml[[i]]) - 1] %>% rbindlist(fill = TRUE)
}
votes_data <- votes_data %>% bind_rows()
# Construct frame to match on legislators for metadata using Wiki every politician project
base_url <- "https://www.wikidata.org/wiki/Wikidata:WikiProject_every_politician/Wales/data/Assembly/5th_Assembly"
pg <- read_html(base_url)
data <- pg %>% html_elements(".wikitable")
df <- data %>% html_table() %>% as.data.frame()
get_link_table <- function(html_table, class){
html_table %>%
html_nodes(xpath=paste0("//a[text()='", class, "']")) %>%
.[[1]] %>%
html_attr("href")
}
df$wikidataid <- sapply(df$Person, function(x)get_link_table(data, x))
df$wikidataid <- gsub("/wiki/", "", df$wikidataid)
welsh_ids <- tw_get_property(id = df$wikidataid,
p = "P4651",
language = "en")
welsh_ids <- welsh_ids %>% distinct(id, .keep_all = TRUE)
df <- left_join(df, welsh_ids, by = c("wikidataid" = "id"))
df <- df %>% select(-property)
df <- df %>% rename(member_id = value)
df <- df %>% distinct(wikidataid, .keep_all = T)
# Account for (2) missing members
members_from_rcv <- votes_data %>% select(Member_Id, Member_name_English) %>%
distinct(Member_Id, .keep_all = TRUE)
members_from_rcv$wiki_missing <- members_from_rcv$Member_Id %in% df$member_id
# Missing Speaker and singular NA case, drop from data
votes_data <- votes_data %>% filter(Member_Id > 1)
# Generate Matrix
votes_mat <- votes_data %>% select(Contribution_ID,
Member_Id,
Results_Result)
votes_mat$Results_Result[votes_mat$Results_Result == "For"] <- 1
votes_mat$Results_Result[votes_mat$Results_Result == "Against"] <- 0
votes_mat$Results_Result[votes_mat$Results_Result == "Abstain"] <- NA
votes_mat$Results_Result[votes_mat$Results_Result == "DidNotVote"] <- 9
votes_mat <- votes_mat %>% rename(member_id = Member_Id,
vote_id = Contribution_ID,
vote = Results_Result)
votes_mat <- votes_mat[order(votes_mat$member_id),]
rcv_mat <- acast(votes_mat, member_id~vote_id, value.var = "vote")
dim(rcv_mat)
df <- df[order(df$member_id),]
names <- df$Person
rcv_ob <- rollcall(rcv_mat, legis.data = df,
legis.names = names)
# Fit NOMINATE to Rollcall Object
wnom_points <- wnominate::wnominate(rcv_ob, polarity = c(1, 2))
wnominate::plot.coords(wnom_points, plotBy = "Group")
pscl_points <- ideal(rcv_ob)
pscl_points
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment