reuning/poll.conf

## poll.conf
poll.conf <- function(url, inc.und=T){
  library(rvest)
  library(magrittr)
  library(stringr)

  poll.page <- html(url)

  poll.r <- poll.page %>% html_node("table[class='poll-results-table']") %>% html_table()
  poll.n <- poll.page %>% html_node("div[class='subpop-description']") %>% html_text()
  tmp <- gregexpr("\\d", poll.n)[[1]]
  st <- tmp[[1]]
  en <- tmp[[1]] + sum(attr(tmp, "match.length")) - 1

  N <- as.integer(substr(poll.n, start=st, stop=en))

  c.names <- gsub(" \\(R\\)","", poll.r$X1)
  per <- as.integer(gsub("%","", poll.r$X2))

  tot <- round(per * N/100)

  poll.clean <- data.frame(N=c.names, P=per, Tot=tot)
  if(!inc.und){
    poll.clean <- subset(poll.clean, N!="Undecided")
    c.names <- as.character(poll.clean$N)
    per <- poll.clean$P
    tot <- poll.clean$Tot
  }
  full.data <- character()
  for(ii in 1:nrow(poll.clean)){
    full.data <- append(full.data, rep(as.character(poll.clean[ii, "N"]), poll.clean[ii, "Tot"]))
  }

  sim.n <- 1000
  sim <- matrix(NA, sim.n, length(c.names))
  colnames(sim) <- c.names
  sim.winner <- character(sim.n)


  for(ii in 1:sim.n){
    tmp.votes <- sample(full.data, N, replace=T)
    tmp.table <- table(tmp.votes)/N
    for(jj in 1:length(c.names)){
      tmp.name <- c.names[jj]
      if(tmp.name %in% names(tmp.table)){
        sim[ii, tmp.name] <- tmp.table[tmp.name]
      } else {
        sim[ii, tmp.name] <- 0
      }
    }
    sim.winner[ii] <- names(which.max(tmp.table))
  }

  sim.table <- table(sim.winner)/sim.n

  par(mar=c(2,8,1,1), mfrow=c(2,1))
  max.x <- ((max(sim)*100) %/% 5 + 1) *5
  plot.new()
  plot.window(ylim=c(1,length(c.names)+.2), xlim=c(0,max.x/100))
  od <- rev(order(apply(sim, 2, median)))
  for(ii in 1:length(c.names)){
    lines(y=c(ii,ii), x=quantile(sim[,od[ii]], c(0.025,0.975)), lwd=3)
    points(y=ii, x=per[od[ii]]/100, cex=1.5)
    dens <- density(sim[,od[ii]])
    lines(y=dens$y/50+ii, x=dens$x, lty=2)
  }
  legend("right", legend=c("95% Confidence Int", "Density", "Reported Percent"), lty=c(1,2, NA),
         lwd=c(3,1,NA), pch=(NA,NA,1))

  xax <- seq(0, max.x, by=5)
  xlab <- paste(xax, "%", sep="")
  axis(1, at=xax/100, labels=xlab)
  axis(2, at=1:length(c.names), c.names[od], las=1)
  title(main="Bootstrapped Confidence Intervals")

  barplot(sim.table, horiz=T, las=1, main="Freq of Simulated Wins")

}
	poll.conf <- function(url, inc.und=T){
	library(rvest)
	library(magrittr)
	library(stringr)

	poll.page <- html(url)

	poll.r <- poll.page %>% html_node("table[class='poll-results-table']") %>% html_table()
	poll.n <- poll.page %>% html_node("div[class='subpop-description']") %>% html_text()
	tmp <- gregexpr("\\d", poll.n)[[1]]
	st <- tmp[[1]]
	en <- tmp[[1]] + sum(attr(tmp, "match.length")) - 1

	N <- as.integer(substr(poll.n, start=st, stop=en))

	c.names <- gsub(" \\(R\\)","", poll.r$X1)
	per <- as.integer(gsub("%","", poll.r$X2))

	tot <- round(per * N/100)

	poll.clean <- data.frame(N=c.names, P=per, Tot=tot)
	if(!inc.und){
	poll.clean <- subset(poll.clean, N!="Undecided")
	c.names <- as.character(poll.clean$N)
	per <- poll.clean$P
	tot <- poll.clean$Tot
	}
	full.data <- character()
	for(ii in 1:nrow(poll.clean)){
	full.data <- append(full.data, rep(as.character(poll.clean[ii, "N"]), poll.clean[ii, "Tot"]))
	}

	sim.n <- 1000
	sim <- matrix(NA, sim.n, length(c.names))
	colnames(sim) <- c.names
	sim.winner <- character(sim.n)


	for(ii in 1:sim.n){
	tmp.votes <- sample(full.data, N, replace=T)
	tmp.table <- table(tmp.votes)/N
	for(jj in 1:length(c.names)){
	tmp.name <- c.names[jj]
	if(tmp.name %in% names(tmp.table)){
	sim[ii, tmp.name] <- tmp.table[tmp.name]
	} else {
	sim[ii, tmp.name] <- 0
	}
	}
	sim.winner[ii] <- names(which.max(tmp.table))
	}

	sim.table <- table(sim.winner)/sim.n

	par(mar=c(2,8,1,1), mfrow=c(2,1))
	max.x <- ((max(sim)100) %/% 5 + 1) 5
	plot.new()
	plot.window(ylim=c(1,length(c.names)+.2), xlim=c(0,max.x/100))
	od <- rev(order(apply(sim, 2, median)))
	for(ii in 1:length(c.names)){
	lines(y=c(ii,ii), x=quantile(sim[,od[ii]], c(0.025,0.975)), lwd=3)
	points(y=ii, x=per[od[ii]]/100, cex=1.5)
	dens <- density(sim[,od[ii]])
	lines(y=dens$y/50+ii, x=dens$x, lty=2)
	}
	legend("right", legend=c("95% Confidence Int", "Density", "Reported Percent"), lty=c(1,2, NA),
	lwd=c(3,1,NA), pch=(NA,NA,1))

	xax <- seq(0, max.x, by=5)
	xlab <- paste(xax, "%", sep="")
	axis(1, at=xax/100, labels=xlab)
	axis(2, at=1:length(c.names), c.names[od], las=1)
	title(main="Bootstrapped Confidence Intervals")

	barplot(sim.table, horiz=T, las=1, main="Freq of Simulated Wins")

	}