Skip to content
Create a gist now

Instantly share code, notes, and snippets.

Embed URL


Subversion checkout URL

You can clone with
Download ZIP
RealClearPolitics XML Scraping
doInstall <- TRUE # Change to FALSE if you don't want packages installed.
toInstall <- c("XML", "ggplot2", "lubridate", "reshape2", "scales")
if(doInstall){install.packages(toInstall, repos = "")}
lapply(toInstall, library, character.only = TRUE)
# Find your XML file from those listed at
URL <- ""
parsedXML <- xmlParse(URL) # First pass
dateSeries <- xpathSApply(parsedXML, path = "//series") # Check the structure
Date <- sapply(xmlChildren(dateSeries[[1]]), xmlValue) # of the XML. <series>
names(Date) <- sapply(xmlChildren(dateSeries[[1]]), # is the first child node.
xmlGetAttr, "xid")
graphSeries <- xpathSApply(parsedXML, path = "//graph") # <graph> is the
obamaSeries <- sapply(xmlChildren(graphSeries[[1]]), xmlValue) # second major
names(obamaSeries) <- sapply(xmlChildren(graphSeries[[1]]), # child node.
xmlGetAttr, "xid") # ^ The first graph line
romneySeries <- sapply(xmlChildren(graphSeries[[2]]), xmlValue) # is "Obama"
names(romneySeries) <- sapply(xmlChildren(graphSeries[[2]]), # The second
xmlGetAttr, "xid") # ^ is "Romney."
# Put all of these series into a data.frame
rcpData <- data.frame(xid = names(Date), stringsAsFactors = FALSE)
rcpData$Date <- mdy(Date[rcpData$xid], tz = "EST") # lubridate!
rcpData$Romney <- as.numeric(romneySeries[rcpData$xid])
rcpData$Obama <- as.numeric(obamaSeries[rcpData$xid])
write.csv(rcpData, "RealClearPolitics Polling Average.csv", row.names = F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.