Skip to content

Instantly share code, notes, and snippets.

@elidourado
Created April 8, 2015 15:12
Show Gist options
  • Save elidourado/f72933d103fcb8949b82 to your computer and use it in GitHub Desktop.
Save elidourado/f72933d103fcb8949b82 to your computer and use it in GitHub Desktop.
This is R code to scrape the IGM Economic Experts Panel website
#the XML package must be installed
require(XML)
require(utils)
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
isempty <- function (x) length(x)==0
doc <- htmlTreeParse("http://www.igmchicago.org/igm-economic-experts-panel")
linknodes <- xpathApply(xmlRoot(doc), "//h3[@class='surveyQuestion']/a[@href][1]")
links <- list()
output <- data.frame("","","","","","","")
names(output) <- c("Date","Question","Name","Institution","Answer","Confidence","Comment")
output <- output[-1,]
for(link in linknodes) {
links<-c(links,list(xmlGetAttr(link,"href")))
links<-unique(links)
}
for(url in links) {
page <- htmlTreeParse(paste("http://www.igmchicago.org",url,sep=""))
numquestions = length(xpathApply(xmlRoot(page), "//h3[@class='surveyQuestion']"))
Questions <- list()
if(numquestions == 1) {
qtext <- xmlValue(xpathApply(xmlRoot(page), "//h3[@class='surveyQuestion']")[[1]])
if(length(qtext)==0) {
Questions <- lapply(xpathApply(xmlRoot(page), "//h3[@class='surveyQuestion']/following-sibling::p"),xmlValue)
if(isempty(Questions[[1]])){
Questions <- Questions[-(which(sapply(Questions,isempty),arr.ind=TRUE))]
}
} else {
Questions <- list(qtext)
}
} else {
Questions <- lapply(xpathApply(xmlRoot(page), "//h3[@class='surveyQuestion']"),xmlValue)
}
if(length(Questions)!=numquestions){
print("error on")
print(url)
print(Questions)
}
Date <- xmlValue(xpathApply(xmlRoot(page),"//h6")[[1]])
responses <- data.frame()
tables <- xpathApply(xmlRoot(page), "//table[@class='responseDetail']")
for(i in 1:length(tables)) {
rows <- xpathApply(tables[[i]], "//tr[@class='parent-row']")
for(row in rows) {
cells <- xpathApply(row,"//td")
Question <- Questions[[i]]
Name <- trim(xmlValue(cells[[1]]))
Institution <- trim(xmlValue(cells[[2]]))
Answer <- trim(xmlValue(cells[[3]]))
Confidence <- trim(xmlValue(cells[[4]]))
if(length(Confidence)==0){
Confidence <- ""
}
Comment <- trim(xmlValue(cells[[5]]))
tmpdata <- data.frame(Date,Question,Name,Institution,Answer,Confidence,Comment)
output <- rbind(output,tmpdata)
}
}
}
write.csv(output,file="/Users/eli/Documents/Research/IGM/igm.csv", row.names = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment