Skip to content

Instantly share code, notes, and snippets.

@reuning
Last active August 29, 2015 14:25
Show Gist options
  • Save reuning/aad9c84bffd6b67700f7 to your computer and use it in GitHub Desktop.
Save reuning/aad9c84bffd6b67700f7 to your computer and use it in GitHub Desktop.
poll.conf <- function(url, inc.und=T){
library(rvest)
library(magrittr)
library(stringr)
poll.page <- html(url)
poll.r <- poll.page %>% html_node("table[class='poll-results-table']") %>% html_table()
poll.n <- poll.page %>% html_node("div[class='subpop-description']") %>% html_text()
tmp <- gregexpr("\\d", poll.n)[[1]]
st <- tmp[[1]]
en <- tmp[[1]] + sum(attr(tmp, "match.length")) - 1
N <- as.integer(substr(poll.n, start=st, stop=en))
c.names <- gsub(" \\(R\\)","", poll.r$X1)
per <- as.integer(gsub("%","", poll.r$X2))
tot <- round(per * N/100)
poll.clean <- data.frame(N=c.names, P=per, Tot=tot)
if(!inc.und){
poll.clean <- subset(poll.clean, N!="Undecided")
c.names <- as.character(poll.clean$N)
per <- poll.clean$P
tot <- poll.clean$Tot
}
full.data <- character()
for(ii in 1:nrow(poll.clean)){
full.data <- append(full.data, rep(as.character(poll.clean[ii, "N"]), poll.clean[ii, "Tot"]))
}
sim.n <- 1000
sim <- matrix(NA, sim.n, length(c.names))
colnames(sim) <- c.names
sim.winner <- character(sim.n)
for(ii in 1:sim.n){
tmp.votes <- sample(full.data, N, replace=T)
tmp.table <- table(tmp.votes)/N
for(jj in 1:length(c.names)){
tmp.name <- c.names[jj]
if(tmp.name %in% names(tmp.table)){
sim[ii, tmp.name] <- tmp.table[tmp.name]
} else {
sim[ii, tmp.name] <- 0
}
}
sim.winner[ii] <- names(which.max(tmp.table))
}
sim.table <- table(sim.winner)/sim.n
par(mar=c(2,8,1,1), mfrow=c(2,1))
max.x <- ((max(sim)*100) %/% 5 + 1) *5
plot.new()
plot.window(ylim=c(1,length(c.names)+.2), xlim=c(0,max.x/100))
od <- rev(order(apply(sim, 2, median)))
for(ii in 1:length(c.names)){
lines(y=c(ii,ii), x=quantile(sim[,od[ii]], c(0.025,0.975)), lwd=3)
points(y=ii, x=per[od[ii]]/100, cex=1.5)
dens <- density(sim[,od[ii]])
lines(y=dens$y/50+ii, x=dens$x, lty=2)
}
legend("right", legend=c("95% Confidence Int", "Density", "Reported Percent"), lty=c(1,2, NA),
lwd=c(3,1,NA), pch=(NA,NA,1))
xax <- seq(0, max.x, by=5)
xlab <- paste(xax, "%", sep="")
axis(1, at=xax/100, labels=xlab)
axis(2, at=1:length(c.names), c.names[od], las=1)
title(main="Bootstrapped Confidence Intervals")
barplot(sim.table, horiz=T, las=1, main="Freq of Simulated Wins")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment