Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Last active August 29, 2015 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chrishanretty/f3b7165563bc1669cdc8 to your computer and use it in GitHub Desktop.
Save chrishanretty/f3b7165563bc1669cdc8 to your computer and use it in GitHub Desktop.
On the night projections of #indyref results
## ----Setup, include=FALSE, results="hide", warning=FALSE, eval = TRUE----
## This chunk is for setting nice options for the output. Notice how
## we create both png and pdf files by default, so that we can easily process
## to both HTML and LaTeX/PDF files later.
opts_chunk$set(fig.path='figures/paper-', cache.path='cache/report-', dev=c("png","pdf"), fig.width=14, fig.height=7, fig.show='hold', fig.lp="fig:", cache=FALSE, par=TRUE, echo=FALSE, results="hide", message=FALSE, warning=FALSE, dpi = 300)
knit_hooks$set(par=function(before, options, envir){
if (before && options$fig.show!='none') par(mar=c(4,4,2,.1),cex.lab=.95,cex.axis=.9,mgp=c(2,.7,0),tcl=-.3)
}, crop=hook_pdfcrop)
## ----loadlibs, echo = FALSE, message = FALSE, warning = FALSE------------
### Libraries
library(rjags)
library(R2WinBUGS)
library(ascii)
library(ggplot2)
my.xlims <- c(0.25,0.75)
set.seed(18092014)
## ----priors, echo = FALSE------------------------------------------------
national.prior.mean <- 0.49
national.prior.var <- 0.003
## ----priormeanfig, echo = FALSE, fig = TRUE, fig.cap = "Simulated referendums using priors",fig.width=7,fig.height=4----
simulated.referendums <- rnorm(10000,
mean = national.prior.mean,
sd = sqrt(national.prior.var))
prior.prob <- mean(simulated.referendums > 0.5)
hist(simulated.referendums,
main = paste0("Prior probability Yes vote: ",round(prior.prob,2)),
breaks = 25,
col = "#999999",
border = "#FFFFFF",
xlim = my.xlims,
xlab = "Yes %")
## ----lauthpriors, echo = FALSE, message = FALSE, warning = FALSE, results = 'asis',tab.cap = "Local area priors"----
offset.priors <- read.csv("../preds.csv", header = TRUE)
bump.factor <- 2
offset.priors$var <- offset.priors$var * bump.factor
## ----bookies, echo = FALSE, results = 'hide', message = FALSE, warning = FALSE----
coral.breaks <- c(0,55,60,65,70,75,80,85,90,95,100) / 100
coral.odds <- c(101, 51, 26, 17, 5.5, 3.25, 3, 4.5, 11, 34)
coral.impliedprobs <- 1 / coral.odds
overround <- sum(coral.impliedprobs)
coral.impliedprobs <- coral.impliedprobs / overround
coral.midpoints <- coral.breaks[-length(coral.breaks)] + diff(coral.breaks)/2
### get the distribution
coral.implied.mean <- sum(coral.midpoints * coral.impliedprobs)
coral.implied.var <- sd(rep(coral.midpoints , round(1000*coral.impliedprobs,0)))
skybet.breaks <- c(60,65, 70, 75, 80, 85, 90, 100) / 100
skybet.odds <- c(26, 15, 6, 3.75, 2.88, 4, 9)
skybet.impliedprobs <- 1 / skybet.odds
overround <- sum(skybet.impliedprobs)
skybet.impliedprobs <- skybet.impliedprobs / overround
skybet.midpoints <- skybet.breaks[-length(skybet.breaks)] + diff(skybet.breaks)/2
skybet.implied.mean <- sum(skybet.midpoints * skybet.impliedprobs)
skybet.implied.var <- sd(rep(skybet.midpoints , round(1000*skybet.impliedprobs,0)))
alpha.mean <- mean(c(skybet.implied.mean,coral.implied.mean))
alpha.sd <- mean(c(skybet.implied.var,coral.implied.var))
alpha.var <- alpha.sd^2
alpha.var <- alpha.var * 32
alpha.prec <- 1 / alpha.sd ^2
## ----readinresults, echo = FALSE, message = FALSE, warning = FALSE, results = 'hide'----
res <- read.csv("partial_results.csv", header = T)
res$ExpectedDeclaration <- as.POSIXct(paste0("2014-09-19 ",res$ExpectedDeclaration),format="%Y-%m-%d %H:%M:%S")
obs <- res$YesVotes / (res$YesVotes + res$NoVotes)
names(obs) <- res$Authority
res$CouncilTurnout.sc <- res$CouncilTurnout - mean(res$CouncilTurnout,na.rm = TRUE)
obsturnout <- (res$YesVotes + res$NoVotes) / res$Electorate
res <- merge(res, offset.priors,
by.x= "Authority", by.y = "profile_oslaua",
all = TRUE)
res$Weights <- res$Electorate / sum(res$Electorate)
## ----datatable, echo = FALSE, results= 'asis', message = FALSE, warning = FALSE----
print(ascii(res[,c("Authority","Weights","CouncilTurnout","xbar")],
include.rownames = FALSE,
colnames = c("Area","Weight","Previous Turnout","Offset"),
digits = 4),
type = "pandoc")
## ----jagsmod, echo = TRUE------------------------------------------------
mod <- function() {
for (i in 1:nLauths) {
## Yes vote model
y[i] ~ dnorm(mu[i],est.prec)%_%T(0,1)
mu[i] <- national.pct + betaoffset * lauth.prior.mean[i]
## Turnout model
tout[i] ~ dnorm(eta[i],big.prec)%_%T(0,1)
eta[i] <- alpha + theta[i]
theta[i] ~ dnorm(kappa[i],tau)
kappa[i] <- alpha0 + beta * tstar[i]
}
### Make sure weights sum to one
for (i in 1:nLauths) {
wtstar[i] <- (tout[i] * elecshare[i])
wt[i] <- wtstar[i] / sum(wtstar[1:nLauths])
}
national <- inprod(y[1:32],wt[1:32])
national.pct ~ dnorm(national.prior.mean, national.prior.prec)
big.prec <- 100000
est.prec <- pow(est.sigma,-2)
est.sigma ~ dunif(0,0.25)
alpha ~ dnorm(turnout.prior.mean, turnout.prior.prec)
beta ~ dunif(0,1)
betaoffset ~ dnorm(1,pow(0.5,-2))
tau <- pow(sigma,-2)
sigma ~ dunif(0,0.25)
alpha0 ~ dnorm(0,.001)
}
## ----workedexample, echo = FALSE, message = FALSE, warning = FALSE, results = 'hide', eval = TRUE----
write.model(mod, "predmodel.bug")
clackman <- which(res$Authority == "Clackmannanshire")
res$YesVotes[clackman] <- res$Electorate[clackman] * 0.7 * 0.52
res$NoVotes[clackman] <- res$Electorate[clackman] * 0.7 * 0.48
res$SpoiledVotes[clackman] <- 0
res$tout <- (res$YesVotes + res$NoVotes)
res$y <- res$YesVotes / res$tout
res$tout <- res$tout / res$Electorate
initfunc <- function() {
#if (exists("out")) {
# alpha = mean(out[[1]][,"alpha"])
# beta = mean(out[[1]][,"beta"])
#} else {
alpha = rnorm(1,0.8,.003)
beta = runif(1,0,.25)
#}
return(list(alpha=alpha,beta=beta))
}
forJags <- list(lauth.prior.mean = res$xbar,
national.prior.mean = national.prior.mean,
national.prior.prec = 1/ national.prior.var,
turnout.prior.mean = alpha.mean,
turnout.prior.prec = alpha.prec,
elecshare = res$Weights,
nLauths = nrow(res),
y = res$y,
tout = res$tout,
tstar = res$CouncilTurnout.sc)
my.iter <- 50000
my.burnin <- 25000
my.thin <- my.iter / 1e3
my.debug <- FALSE
model.sim <- jags.model("predmodel.bug",
data=forJags,
inits = initfunc,
n.chains = 3)
update(model.sim, n.iter = my.burnin)
out <- coda.samples(model.sim, c("national", "national.pct","mu","tout","alpha","beta","betaoffset","wtstar","est.sigma","sigma"),
n.iter = my.iter,
thin = my.thin)
holder <- summary(out)
geweke.diag(out)
## What does the national figure look like?
holder$statistics["national",]
### How does this compare to our prior, visually?
post.prob <- mean(out[[1]][,"national"]>0.5)
## ----workedexamplefig, echo = FALSE, fig= TRUE, fig.cap = "Change in projection following a hypothetical Clackmannanshire vote",fig.width=7,fig.height=9----
par(mfrow = c(2,1))
hist(simulated.referendums,
main = paste0("Prior probability of Yes vote: ",round(prior.prob,2)),
breaks = 25,
xlim = my.xlims,
col = "#999999",
border = "#FFFFFF",
xlab = "Yes %")
hist(out[[1]][,"national"],
main = paste0("Posterior probability of Yes vote: ",round(post.prob,2)),
xlim = my.xlims,
breaks = 25,
col = "#999999",
border = "#FFFFFF",
xlab = "Yes %")
## ----tweets, echo = FALSE, eval = TRUE-----------------------------------
uea.url <- "http://www.ueapolitics.org/"
mean.yes <- mean(out[[1]][,"national"])
lo.yes <- quantile(out[[1]][,"national"],0.025)
hi.yes <- quantile(out[[1]][,"national"],0.975)
turnouts <- apply(out[[1]][,grep("wtstar",colnames(out[[1]]))],1,sum)
mean.turnout <- mean(turnouts)
lo.turnout <- quantile(turnouts,0.025)
hi.turnout <- quantile(turnouts,0.975)
sorted.areas <- res$y[order(res$Authority)]
names(sorted.areas) <- res$Authority
sorted.areas <- sorted.areas[!is.na(sorted.areas)]
tweet1 <- paste0("Current probability of Yes vote: ",round(100*post.prob,2),"%, up from ", round(100*(prior.prob),2),"% at the start of the night #indyref")
tweet2 <- paste0("Current predicted Yes %: ",round(100*mean.yes,2),"% (95% Bayesian CI: ",round(100*lo.yes,2),"% - ",round(100*hi.yes,2),"%) #indyref")
tweet3 <- paste0("Areas reporting: ",paste0(names(sorted.areas)," (",round(100*sorted.areas,2),"%)",collapse="; "))
tweet3.truncated <- paste0(substr(tweet3,0,140),"...")
tweet4 <- paste0("Current predicted turnout %: ",round(100*mean.turnout,2),"% (95% Bayesian CI: ",round(100*lo.turnout,2),"% - ",round(100*hi.turnout,2),"%) #indyref")
### Histogram for outcome?
### Histograms for turnout?
### Plot of prediction history?
## ----dataout, echo = FALSE, eval = FALSE---------------------------------
## res$hjust <- res$xpos <- NULL
## write.csv(res,file="onthenight_data.csv",row.names= FALSE)
## ----bytheclock, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, cache = FALSE----
### Reset the clackmannanshire data
res$YesVotes[clackman] <- NA
res$NoVotes[clackman] <- NA
res$SpoiledVotes[clackman] <- NA
### Order res by expected declaration time
res$DeclarationTime <- res$ExpectedDeclaration + rnorm(32,0,30*60)
res <- res[order(res$DeclarationTime),]
### Set up big loop
for (i in 1:nrow(res)) {
### Establish the turnout in this region
turnout <- rnorm(1,alpha.mean, sqrt(1 /alpha.prec)) + runif(1,0,1) * res$CouncilTurnout.sc[i]
turnout <- ifelse(turnout >= 1,0.99,turnout)
### Establish the Yes share in this region
yes.share <- rnorm(1,national.prior.mean,sqrt(national.prior.var)) + rnorm(1,res$xbar[i],sqrt(res$var[i]))
res$YesVotes[i] <- res$Electorate[i] * turnout * yes.share
res$NoVotes[i] <- res$Electorate[i] * turnout * (1 - yes.share)
res$y <- res$YesVotes / (res$YesVotes + res$NoVotes)
res$tout[i] <- turnout
## Feed this to jags
forJags <- list(lauth.prior.mean = res$xbar,
national.prior.mean = national.prior.mean,
national.prior.prec = 1/ national.prior.var,
turnout.prior.mean = alpha.mean,
turnout.prior.prec = alpha.prec,
elecshare = res$Weights,
nLauths = nrow(res),
y = res$y,
tout = res$tout,
tstar = res$CouncilTurnout.sc)
model.sim <- jags.model("predmodel.bug",
data=forJags,
inits = initfunc,
n.chains = 2)
update(model.sim, n.iter = my.burnin/8)
out <- coda.samples(model.sim, c("national", "national.pct","mu","tout","alpha","beta","wtstar","est.sigma","sigma"),
n.iter = my.iter / 8,
thin = my.thin / 8)
## Get predictions, put them in
res$PosteriorProbability[i] <- mean(out[[1]][,"national"]>0.5)
res$PosteriorYes[i] <- mean(out[[1]][,"national"])
res$PosteriorYesLo[i] <- quantile(out[[1]][,"national"],0.025)
res$PosteriorYesHi[i] <- quantile(out[[1]][,"national"],0.975)
turnouts <- apply(out[[1]][,grep("wtstar",colnames(out[[1]]))],1,sum)
res$PosteriorTurnout[i] <- mean(turnouts)
res$PosteriorTurnoutLo[i] <- quantile(turnouts,0.025)
res$PosteriorTurnoutHi[i] <- quantile(turnouts,0.975)
}
## ----clockfig, echo = FALSE, fig= TRUE, fig.cap = "Change in predictions",fig.width=7,fig.height=9----
### Now plot
res$true.yes <- sum(res$YesVotes) / (sum(res$YesVotes)+sum(res$NoVotes))
res$true.turnout <- (sum(res$YesVotes)+sum(res$NoVotes)) / sum(res$Electorate)
res$maxy <- max(res$PosteriorYesHi)
evolution.plot <- ggplot(res, aes(x= DeclarationTime, y = PosteriorYes, ymin = PosteriorYesLo, ymax = PosteriorYesHi)) +
geom_pointrange() +
geom_hline(aes(yintercept = true.yes)) +
geom_text(aes(label = Authority, y = maxy), angle = -90, size = 3, hjust = 0) +
scale_y_continuous("Predicted Yes share of vote") +
scale_x_datetime("") +
theme_bw()
evolution.plot2 <- ggplot(res, aes(x= DeclarationTime, y = PosteriorTurnout, ymin = PosteriorTurnoutLo, ymax = PosteriorTurnoutHi)) +
geom_pointrange() +
geom_hline(aes(yintercept = true.turnout)) +
scale_y_continuous("Predicted turnout") +
scale_x_datetime("") +
theme_bw()
print(evolution.plot)
## ----clockcumsum, echo = FALSE, eval = FALSE-----------------------------
## res$csYes <- cumsum(res$YesVotes)
## res$csVotes <- cumsum(res$YesVotes+res$NoVotes)
## plot(res$csYes/res$csVotes,res$PosteriorYes, type = "n",
## xlab = "Yes share of the vote thus far",
## ylab = "Predicted yes vote share at end of night")
## lines(res$csYes/res$csVotes, res$PosteriorYes,lty = 2)
## texts <- gsub("2014-09-19 ","",as.character(res$DeclarationTime))
## texts <- paste0(texts, " (",res$Authority,")")
## colour <- heat.colors(n=32)[order(-1*as.numeric(res$DeclarationTime))]
##
## text(res$csYes/res$csVotes, res$PosteriorYes, texts,col = colour, cex = 1)
## text(res$csYes/res$csVotes, res$PosteriorYes, texts,col = "#999999", cex = 0.8)
##
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment