dmarcelinobr/Scotland.R

## Scotland.R
library(MCMCpack)
## Coping with polls: the more recent, the better they are.
##http://en.wikipedia.org/wiki/Opinion_polling_for_the_Scottish_independence_referendum,_2014

polls = NULL
polls <- data.frame( rbind(
  Survation = c(37, 50, 1010),
  YouGov  =  c(35, 55, 1142),
  TNSBMRB =  c(32, 45, 1003),
  IpsosMORI = c(40, 54, 1006),
  Survation = c(40, 46, 1000),
  Panelbase =  c(41, 48, 1041)
  ))

# set up for decimals
polls[, 1:2] <- polls[, 1:2]/100

# weighting polls
wtd.polls <- rbind(polls, c(apply(polls[,1:2],2, weighted.mean, polls[,3]), sum(polls[,3])))

names(wtd.polls) <- c("Yes","No","n")

wtd.polls$Others = 1-wtd.polls$Yes-wtd.polls$No

# Adjusting for the undecideds for the final results
wtd.polls.others <- data.frame(wtd.polls[7,1:2] + wtd.polls[7,1:2]/ sum(wtd.polls[7,1:2])*wtd.polls[7,4],n=wtd.polls[7,3],Others=0)

data = rbind(wtd.polls, wtd.polls.others)

##################################################
## If we have more than two categories (Yes, No, Uncertain), Beta distribution won't work properly.
## The following is the main function, which I use to randomly draw data from a dirichlet distribution.
##################################################
win.probs = function(j=8,export=1){
  p=rdirichlet(100000,
               data$n[j]*
               c(data$Yes[j], data$No[j], 1-data$Yes[j]-data$No[j])+1
  )
  if(export==1){
    mean(p[,2]>p[,1])
  } else {
    return(p)
  }
}

 ( No.win.probs = sapply(1:nrow(data),win.probs) )

## set simulated data for determining parameters and graphing
sim.dist = win.probs(8,export=2)

## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta.
sim.dist = cbind(sim.dist, sim.dist[,2]-sim.dist[,1])

## Draw a histogram of simulated data
hist(sim.dist[,4], col='gray90', nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference")
#abline(v=0, col=c('red'), lwd=2, lty=c(1))

## In this case I ran the function a few time to set the start value close to what I believe be the output.
## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise,
## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2.

fit.dist.1 = fitdistr(sim.dist[,1], "beta",
        start=list(shape1=3,shape2=3))

fit.dist.2 = fitdistr(sim.dist[,2], "beta",
       start=list(shape1=3,shape2=3))

## We can also fit a curve of the margins
fit.dist.margin = fitdistr(sim.dist[,4], "beta",
      start=list(shape1=5,shape2=5))

## Draw densities of simulated data
curve(dbeta(x,fit.dist.1$estimate[1],fit.dist.1$estimate[2]), ylim=c(0,80), xlim=c(.35,.65), col='NA', lty=1, lwd=3, ylab="Density", xlab="theta", main="Distribution of the Yes/No Preference Referendum 2014", sub=paste("Probability that NO wins: ", round(NO.win.probs[8],2) ) ) ## yes 1

curve(dbeta(x,fit.dist.1$estimate[1],fit.dist.1$estimate[2]), xlim=c(.35,.50), col='red', lty=1, add=TRUE, lwd=3) ## yes 1

curve(dbeta(x,fit.dist.2$estimate[1],fit.dist.2$estimate[2]), add=TRUE, col='blue', xlim=c(.50,.65), lty=1, lwd=3) ## No 2

abline(v=c(median(sim.dist[,1]), median(sim.dist[,2])), col=c('red','blue'), lwd=2, lty=c(2,2))
legend("topright",c("Yes","No"), lwd=2, col=c('red','blue'), lty=c(1,1))
	library(MCMCpack)
	## Coping with polls: the more recent, the better they are.
	##http://en.wikipedia.org/wiki/Opinion_polling_for_the_Scottish_independence_referendum,_2014

	polls = NULL
	polls <- data.frame( rbind(
	Survation = c(37, 50, 1010),
	YouGov = c(35, 55, 1142),
	TNSBMRB = c(32, 45, 1003),
	IpsosMORI = c(40, 54, 1006),
	Survation = c(40, 46, 1000),
	Panelbase = c(41, 48, 1041)
	))

	# set up for decimals
	polls[, 1:2] <- polls[, 1:2]/100

	# weighting polls
	wtd.polls <- rbind(polls, c(apply(polls[,1:2],2, weighted.mean, polls[,3]), sum(polls[,3])))

	names(wtd.polls) <- c("Yes","No","n")

	wtd.polls$Others = 1-wtd.polls$Yes-wtd.polls$No

	# Adjusting for the undecideds for the final results
	wtd.polls.others <- data.frame(wtd.polls[7,1:2] + wtd.polls[7,1:2]/ sum(wtd.polls[7,1:2])*wtd.polls[7,4],n=wtd.polls[7,3],Others=0)

	data = rbind(wtd.polls, wtd.polls.others)

	##################################################
	## If we have more than two categories (Yes, No, Uncertain), Beta distribution won't work properly.
	## The following is the main function, which I use to randomly draw data from a dirichlet distribution.
	##################################################
	win.probs = function(j=8,export=1){
	p=rdirichlet(100000,
	data$n[j]*
	c(data$Yes[j], data$No[j], 1-data$Yes[j]-data$No[j])+1
	)
	if(export==1){
	mean(p[,2]>p[,1])
	} else {
	return(p)
	}
	}

	( No.win.probs = sapply(1:nrow(data),win.probs) )

	## set simulated data for determining parameters and graphing
	sim.dist = win.probs(8,export=2)

	## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta.
	sim.dist = cbind(sim.dist, sim.dist[,2]-sim.dist[,1])

	## Draw a histogram of simulated data
	hist(sim.dist[,4], col='gray90', nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference")
	#abline(v=0, col=c('red'), lwd=2, lty=c(1))

	## In this case I ran the function a few time to set the start value close to what I believe be the output.
	## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise,
	## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2.

	fit.dist.1 = fitdistr(sim.dist[,1], "beta",
	start=list(shape1=3,shape2=3))

	fit.dist.2 = fitdistr(sim.dist[,2], "beta",
	start=list(shape1=3,shape2=3))

	## We can also fit a curve of the margins
	fit.dist.margin = fitdistr(sim.dist[,4], "beta",
	start=list(shape1=5,shape2=5))

	## Draw densities of simulated data
	curve(dbeta(x,fit.dist.1$estimate[1],fit.dist.1$estimate[2]), ylim=c(0,80), xlim=c(.35,.65), col='NA', lty=1, lwd=3, ylab="Density", xlab="theta", main="Distribution of the Yes/No Preference Referendum 2014", sub=paste("Probability that NO wins: ", round(NO.win.probs[8],2) ) ) ## yes 1

	curve(dbeta(x,fit.dist.1$estimate[1],fit.dist.1$estimate[2]), xlim=c(.35,.50), col='red', lty=1, add=TRUE, lwd=3) ## yes 1

	curve(dbeta(x,fit.dist.2$estimate[1],fit.dist.2$estimate[2]), add=TRUE, col='blue', xlim=c(.50,.65), lty=1, lwd=3) ## No 2

	abline(v=c(median(sim.dist[,1]), median(sim.dist[,2])), col=c('red','blue'), lwd=2, lty=c(2,2))
	legend("topright",c("Yes","No"), lwd=2, col=c('red','blue'), lty=c(1,1))