Skip to content

Instantly share code, notes, and snippets.

@dmarcelinobr
Last active August 29, 2015 14:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save dmarcelinobr/cde80bf3c7c0788a7e8b to your computer and use it in GitHub Desktop.
Save dmarcelinobr/cde80bf3c7c0788a7e8b to your computer and use it in GitHub Desktop.
library(MCMCpack)
## Coping with polls: the more recent, the better they are.
##http://en.wikipedia.org/wiki/Opinion_polling_for_the_Scottish_independence_referendum,_2014
polls = NULL
polls <- data.frame( rbind(
Survation = c(37, 50, 1010),
YouGov = c(35, 55, 1142),
TNSBMRB = c(32, 45, 1003),
IpsosMORI = c(40, 54, 1006),
Survation = c(40, 46, 1000),
Panelbase = c(41, 48, 1041)
))
# set up for decimals
polls[, 1:2] <- polls[, 1:2]/100
# weighting polls
wtd.polls <- rbind(polls, c(apply(polls[,1:2],2, weighted.mean, polls[,3]), sum(polls[,3])))
names(wtd.polls) <- c("Yes","No","n")
wtd.polls$Others = 1-wtd.polls$Yes-wtd.polls$No
# Adjusting for the undecideds for the final results
wtd.polls.others <- data.frame(wtd.polls[7,1:2] + wtd.polls[7,1:2]/ sum(wtd.polls[7,1:2])*wtd.polls[7,4],n=wtd.polls[7,3],Others=0)
data = rbind(wtd.polls, wtd.polls.others)
##################################################
## If we have more than two categories (Yes, No, Uncertain), Beta distribution won't work properly.
## The following is the main function, which I use to randomly draw data from a dirichlet distribution.
##################################################
win.probs = function(j=8,export=1){
p=rdirichlet(100000,
data$n[j]*
c(data$Yes[j], data$No[j], 1-data$Yes[j]-data$No[j])+1
)
if(export==1){
mean(p[,2]>p[,1])
} else {
return(p)
}
}
( No.win.probs = sapply(1:nrow(data),win.probs) )
## set simulated data for determining parameters and graphing
sim.dist = win.probs(8,export=2)
## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta.
sim.dist = cbind(sim.dist, sim.dist[,2]-sim.dist[,1])
## Draw a histogram of simulated data
hist(sim.dist[,4], col='gray90', nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference")
#abline(v=0, col=c('red'), lwd=2, lty=c(1))
## In this case I ran the function a few time to set the start value close to what I believe be the output.
## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise,
## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2.
fit.dist.1 = fitdistr(sim.dist[,1], "beta",
start=list(shape1=3,shape2=3))
fit.dist.2 = fitdistr(sim.dist[,2], "beta",
start=list(shape1=3,shape2=3))
## We can also fit a curve of the margins
fit.dist.margin = fitdistr(sim.dist[,4], "beta",
start=list(shape1=5,shape2=5))
## Draw densities of simulated data
curve(dbeta(x,fit.dist.1$estimate[1],fit.dist.1$estimate[2]), ylim=c(0,80), xlim=c(.35,.65), col='NA', lty=1, lwd=3, ylab="Density", xlab="theta", main="Distribution of the Yes/No Preference Referendum 2014", sub=paste("Probability that NO wins: ", round(NO.win.probs[8],2) ) ) ## yes 1
curve(dbeta(x,fit.dist.1$estimate[1],fit.dist.1$estimate[2]), xlim=c(.35,.50), col='red', lty=1, add=TRUE, lwd=3) ## yes 1
curve(dbeta(x,fit.dist.2$estimate[1],fit.dist.2$estimate[2]), add=TRUE, col='blue', xlim=c(.50,.65), lty=1, lwd=3) ## No 2
abline(v=c(median(sim.dist[,1]), median(sim.dist[,2])), col=c('red','blue'), lwd=2, lty=c(2,2))
legend("topright",c("Yes","No"), lwd=2, col=c('red','blue'), lty=c(1,1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment