Skip to content

Instantly share code, notes, and snippets.

@sebastianbarfort
Last active December 7, 2021 07:15
Show Gist options
  • Save sebastianbarfort/7561952 to your computer and use it in GitHub Desktop.
Save sebastianbarfort/7561952 to your computer and use it in GitHub Desktop.
Download kommunalvalgsdata fra kmdvalg.dk.
library(XML)
# main page
url <- "http://kmdvalg.dk/main"
# read links
url.data <- readLines(url)
doc <- htmlTreeParse(url.data, useInternalNodes = TRUE)
#grab urls
urls <- unlist(xpathApply(doc, "//div[@class='LetterGroup']//a[@href]",
xmlGetAttr, "href"))
# function
get.data <- function(url){
# read url
cat("url:", url)
url.data <- readLines(url)
tables <- htmlTreeParse(url.data, useInternalNodes = TRUE, encoding = "UTF-8")
#grab data
st.nu <- xpathSApply(tables, "//td[@class='StemmerNu']", xmlValue)
st.sidst <- xpathSApply(tables, "//td[@class='StemmerSidst']", xmlValue)
navn <- xpathSApply(tables, "//td[@class='kandidat']", xmlValue)
kom <- xpathSApply(tables, "//title", xmlValue)
# clean
st.nu <- st.nu[c(rep(TRUE,1),FALSE)]
st.sidst <- st.sidst[c(rep(TRUE,1),FALSE)]
navn <- navn[c(rep(FALSE,1),TRUE)]
# clean some more
st.nu <- gsub("\\.","", st.nu)
st.sidst <- gsub("\\.","", st.sidst)
st.sidst <- gsub("\\(|\\)", "", st.sidst)
kom <- gsub("Kommunalvalg ","", kom)
kom <- rep(kom, length(st.nu))
return(cbind(navn,st.nu,st.sidst,kom))
}
data <- list()
for (i in seq_along(urls)){
# extracting information
data[[i]] <- get.data(urls[i])
# waiting one second between hits
Sys.sleep(1)
cat(" done!\n")
}
data <- data.frame(do.call(rbind, data), stringsAsFactors=F)
colnames(data) <- c("kandidatlister","stemmetal","ændring","kommune")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment