Last active
September 14, 2021 19:54
-
-
Save dbrby/8c778a584a559b62003c36676a7037e3 to your computer and use it in GitHub Desktop.
Parse XML vote records from record.senedd, match to wikidata and estimate IPEs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Estimate Ideal Points for Members of the Fifth Senedd from RCVs | |
#install.packages("pacman") | |
#install.packages('remotes') | |
remotes::install_github('wmay/dwnominate') | |
pacman::p_load(rvest, lubridate, tidyverse, readr, data.table, | |
XML, pbapply, tidywikidatar, reshape2, pscl, | |
wnominate) | |
# Generate URLs and download XML files pertaining to Votes | |
front <- "https://record.senedd.wales/XMLExport/?start=" | |
mid <- "&end=" | |
dates <- seq(from = as.Date("2016/5/5"), to = as.Date("2020/5/5"), "month") | |
links <- paste0(front, dates, mid, dates + months(1)) | |
results_html <- pblapply(links, read_html) | |
links_xml_recs <- pblapply(results_html, function(get_links) { | |
get_links %>% html_elements("a") %>% html_attr("href") | |
}) | |
links_xml_recs <- links_xml_recs %>% unlist() %>% str_subset(pattern = "Download?") | |
votes <- links_xml_recs %>% str_subset(pattern = "Votes") %>% | |
paste0("https://record.senedd.wales", .) | |
path <- "/Users/danbraby/Dropbox/cymru/gist/" #Where we store the raw XML files | |
for (i in 1:length(votes)) { | |
download.file(url = votes[i], destfile = paste0(path, i, ".xml")) | |
} | |
votes_files <- list.files(path = path, full.names = TRUE) | |
votes_xml <- pblapply(votes_files, function(xml_formatter) { | |
xml_formatter %>% xmlParse() %>% xmlToList() | |
}) | |
votes_data <- NA | |
votes_data <- votes_data %>% as.list() | |
for(i in 1:length(votes_xml)) { | |
votes_data[[i]] <- votes_xml[[i]][1:length(votes_xml[[i]]) - 1] %>% rbindlist(fill = TRUE) | |
} | |
votes_data <- votes_data %>% bind_rows() | |
# Construct frame to match on legislators for metadata using Wiki every politician project | |
base_url <- "https://www.wikidata.org/wiki/Wikidata:WikiProject_every_politician/Wales/data/Assembly/5th_Assembly" | |
pg <- read_html(base_url) | |
data <- pg %>% html_elements(".wikitable") | |
df <- data %>% html_table() %>% as.data.frame() | |
get_link_table <- function(html_table, class){ | |
html_table %>% | |
html_nodes(xpath=paste0("//a[text()='", class, "']")) %>% | |
.[[1]] %>% | |
html_attr("href") | |
} | |
df$wikidataid <- sapply(df$Person, function(x)get_link_table(data, x)) | |
df$wikidataid <- gsub("/wiki/", "", df$wikidataid) | |
welsh_ids <- tw_get_property(id = df$wikidataid, | |
p = "P4651", | |
language = "en") | |
welsh_ids <- welsh_ids %>% distinct(id, .keep_all = TRUE) | |
df <- left_join(df, welsh_ids, by = c("wikidataid" = "id")) | |
df <- df %>% select(-property) | |
df <- df %>% rename(member_id = value) | |
df <- df %>% distinct(wikidataid, .keep_all = T) | |
# Account for (2) missing members | |
members_from_rcv <- votes_data %>% select(Member_Id, Member_name_English) %>% | |
distinct(Member_Id, .keep_all = TRUE) | |
members_from_rcv$wiki_missing <- members_from_rcv$Member_Id %in% df$member_id | |
# Missing Speaker and singular NA case, drop from data | |
votes_data <- votes_data %>% filter(Member_Id > 1) | |
# Generate Matrix | |
votes_mat <- votes_data %>% select(Contribution_ID, | |
Member_Id, | |
Results_Result) | |
votes_mat$Results_Result[votes_mat$Results_Result == "For"] <- 1 | |
votes_mat$Results_Result[votes_mat$Results_Result == "Against"] <- 0 | |
votes_mat$Results_Result[votes_mat$Results_Result == "Abstain"] <- NA | |
votes_mat$Results_Result[votes_mat$Results_Result == "DidNotVote"] <- 9 | |
votes_mat <- votes_mat %>% rename(member_id = Member_Id, | |
vote_id = Contribution_ID, | |
vote = Results_Result) | |
votes_mat <- votes_mat[order(votes_mat$member_id),] | |
rcv_mat <- acast(votes_mat, member_id~vote_id, value.var = "vote") | |
dim(rcv_mat) | |
df <- df[order(df$member_id),] | |
names <- df$Person | |
rcv_ob <- rollcall(rcv_mat, legis.data = df, | |
legis.names = names) | |
# Fit NOMINATE to Rollcall Object | |
wnom_points <- wnominate::wnominate(rcv_ob, polarity = c(1, 2)) | |
wnominate::plot.coords(wnom_points, plotBy = "Group") | |
pscl_points <- ideal(rcv_ob) | |
pscl_points | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment