dmarcelinobr/Scotland2.R

## Scotland2.R
# Data latest polls
polls = NULL
polls <- data.frame( rbind(
  Opinium = c(43, 47, 1156),
  Survation = c(44, 48, 1000),
  ICM =  c(41, 45, 1175)
))

# set up for decimals
polls[, 1:2] <- polls[, 1:2]/100

# weighting polls
wtd.polls <- rbind(polls, c(apply(polls[,1:2],2, weighted.mean, polls[,3]), sum(polls[,3])))

names(wtd.polls) <- c("Yes","No","n")

wtd.polls$Others = 1-wtd.polls$Yes-wtd.polls$No

# Adjusting for the undecideds for the final results
wtd.polls[5,] <- data.frame(wtd.polls[4,1:2] + wtd.polls[4,1:2]/ sum(wtd.polls[4,1:2])*wtd.polls[4,4],n=wtd.polls[4,3],Others=0)

##################################################
## If we have more than two categories (Yes, No, and DK), Beta distribution won't work properly.
## The following is a function which I used to draw from a dirichlet distribution.
##The Dirichlet distribution is the multidimensional generalization of the beta distribution. It's the canonical Bayesian distribution for the parameter estimates of a multinomial distribution.
## The Dirichlet function returns a matrix with n rows, each containing a single Dirichlet random deviate.
##################################################
row= 5
prob2win = function(row,export=1){
  p=rdirichlet(100000,
  wtd.polls$n[row] *
  c(wtd.polls$Yes[row], wtd.polls$No[row], 1-wtd.polls$Yes[row]-wtd.polls$No[row])+1
  )
  if(export==1){
    mean(p[,1]<p[,2]) ## No exceeds Yes?
  } else {
    return(p)
  }
}


(No.win.probs = sapply(1:nrow(wtd.polls),prob2win))


## set simulated data for determining parameters and graphing
p = prob2win(row= 5, export=0)

## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta.
p = cbind(p, p[,2]-p[,1])

## Draw a histogram of simulated data
hist(p[,5], col="gray", nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference")
abline(v=0, col=c('red'), lwd=2, lty=c(1))

## In this case I ran the function a few time to set the start value close to what I believe be the output.
## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise,
## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2.
library(MCMCpack)
fit1 = fitdistr(p[,1], "beta",
                  start=list(shape1=455,shape2=555))

fit2 = fitdistr(p[,2], "beta",
                    start=list(shape1=545,shape2=455))

library(png)
#Replace the directory and file information with your info
png <- readPNG(file.choose())

#Get the plot information so the image will fill the plot box, and draw it
lim <- par()
rasterImage(png, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4])
grid()

## Draw densities of simulated data
curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), ylim=c(0,50), xlim=c(.40,.60), col='NA', lty=1, lwd=3, ylab="Density", xlab=expression("Posterior "*pi(theta)*""), main="Distribution of the Yes/No Preference Referendum 2014", sub=paste("Probability that NO wins: ", round(No.win.probs[5],3) ) ) ## yes 1

curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), xlim=c(.43,.52), col='green', lty=1, add=TRUE, lwd=8) ## yes 1

curve(dbeta(x,fit2$estimate[1],fit2$estimate[2]), add=TRUE, col='magenta', xlim=c(.49,.56), lty=1, lwd=8) ## No 2

abline(v=c(median(p[,1]), median(p[,2])), col=c('green','magenta'), lwd=2, lty=c(2,2))
legend("topright",c("Yes","No"), lwd=2, col=c('green','magenta'), lty=c(1,1))
	# Data latest polls
	polls = NULL
	polls <- data.frame( rbind(
	Opinium = c(43, 47, 1156),
	Survation = c(44, 48, 1000),
	ICM = c(41, 45, 1175)
	))

	# set up for decimals
	polls[, 1:2] <- polls[, 1:2]/100

	# weighting polls
	wtd.polls <- rbind(polls, c(apply(polls[,1:2],2, weighted.mean, polls[,3]), sum(polls[,3])))

	names(wtd.polls) <- c("Yes","No","n")

	wtd.polls$Others = 1-wtd.polls$Yes-wtd.polls$No

	# Adjusting for the undecideds for the final results
	wtd.polls[5,] <- data.frame(wtd.polls[4,1:2] + wtd.polls[4,1:2]/ sum(wtd.polls[4,1:2])*wtd.polls[4,4],n=wtd.polls[4,3],Others=0)

	##################################################
	## If we have more than two categories (Yes, No, and DK), Beta distribution won't work properly.
	## The following is a function which I used to draw from a dirichlet distribution.
	##The Dirichlet distribution is the multidimensional generalization of the beta distribution. It's the canonical Bayesian distribution for the parameter estimates of a multinomial distribution.
	## The Dirichlet function returns a matrix with n rows, each containing a single Dirichlet random deviate.
	##################################################
	row= 5
	prob2win = function(row,export=1){
	p=rdirichlet(100000,
	wtd.polls$n[row] *
	c(wtd.polls$Yes[row], wtd.polls$No[row], 1-wtd.polls$Yes[row]-wtd.polls$No[row])+1
	)
	if(export==1){
	mean(p[,1]<p[,2]) ## No exceeds Yes?
	} else {
	return(p)
	}
	}


	(No.win.probs = sapply(1:nrow(wtd.polls),prob2win))



	## set simulated data for determining parameters and graphing
	p = prob2win(row= 5, export=0)

	## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta.
	p = cbind(p, p[,2]-p[,1])

	## Draw a histogram of simulated data
	hist(p[,5], col="gray", nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference")
	abline(v=0, col=c('red'), lwd=2, lty=c(1))

	## In this case I ran the function a few time to set the start value close to what I believe be the output.
	## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise,
	## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2.
	library(MCMCpack)
	fit1 = fitdistr(p[,1], "beta",
	start=list(shape1=455,shape2=555))

	fit2 = fitdistr(p[,2], "beta",
	start=list(shape1=545,shape2=455))

	library(png)
	#Replace the directory and file information with your info
	png <- readPNG(file.choose())

	#Get the plot information so the image will fill the plot box, and draw it
	lim <- par()
	rasterImage(png, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4])
	grid()

	## Draw densities of simulated data
	curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), ylim=c(0,50), xlim=c(.40,.60), col='NA', lty=1, lwd=3, ylab="Density", xlab=expression("Posterior "pi(theta)""), main="Distribution of the Yes/No Preference Referendum 2014", sub=paste("Probability that NO wins: ", round(No.win.probs[5],3) ) ) ## yes 1

	curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), xlim=c(.43,.52), col='green', lty=1, add=TRUE, lwd=8) ## yes 1

	curve(dbeta(x,fit2$estimate[1],fit2$estimate[2]), add=TRUE, col='magenta', xlim=c(.49,.56), lty=1, lwd=8) ## No 2

	abline(v=c(median(p[,1]), median(p[,2])), col=c('green','magenta'), lwd=2, lty=c(2,2))
	legend("topright",c("Yes","No"), lwd=2, col=c('green','magenta'), lty=c(1,1))