chrishanretty/eastminster_post.R

## eastminster_post.R

## ----loadlibs, echo = FALSE----------------------------------------------
library(foreign)
library(lme4)
library(maptools)
library(car)
library(R2WinBUGS)
library(spdep)
library(ggplot2)
library(scales)
library(grid)
library(png)
library(reshape)
library(plyr)


## ----loaddata, echo = FALSE----------------------------------------------
bes <- read.spss("BES2015_W2_Panel_v2.0.sav",
	to.data.frame = TRUE)


## ----subset, echo = FALSE------------------------------------------------
### Subset
### (1) to Scotland
bes <- subset(bes, bes$gor == "Scotland")
scots.respondents <- nrow(bes)
table(as.character(bes$profile_oslaua))

ew.lauths <- c("Bedford","Cardiff", "Mid Devon", "Mid Suffolk",
	"North Warwickshire","not used", "Redcar and Cleveland",
	"Sefton","Teignbridge")
bes <- subset(bes, !is.element(profile_oslaua, ew.lauths))
bes$profile_oslaua <- factor(as.character(bes$profile_oslaua))

### (2) to those who will vote in the referendum
table(as.character(bes$scotReferendumIntentionW2))
bes <- subset(bes, scotReferendumIntentionW2 %in% c("Will vote no","Will vote 'Yes'"))
bes$scotReferendumYes <- as.numeric(bes$scotReferendumIntentionW2 == "Will vote 'Yes'")


## ----shapefiles, echo = FALSE--------------------------------------------
### Next, get shapefiles, and from that, density
x <- readShapePoly("shapefiles/district_borough_unitary_region.shp")
### Get just Scottish local authorities
x <- subset(x, NAME %in% c("Angus","Clackmannanshire", "Dundee City",
	"East Ayrshire", "East Dunbartonshire","East Renfrewshire", "Falkirk",
	"Glasgow City", "Inverclyde","Midlothian", "North Lanarkshire",
	"Perth and Kinross", "Renfrewshire","Scottish Borders", "South Lanarkshire",
	"Stirling", "West Dunbartonshire","West Lothian", "Highland",
	"Moray", "Orkney Islands","Na h-Eileanan an Iar", "Argyll and Bute",
	"Aberdeenshire", "Fife","Aberdeen City", "City of Edinburgh",
	"East Lothian", "Shetland Islands","North Ayrshire", "Dumfries and Galloway",
	"South Ayrshire"))
x$NAME <- as.character(x$NAME)
x$NAME <- car:::recode(x$NAME,
"'Perth and Kinross'='Perth & Kinross';
 'Na h-Eileanan an Iar'='Eilean Siar';
 'Argyll and Bute'='Argyll & Bute';
 'City of Edinburgh'='Edinburgh, City of';
 'Dumfries and Galloway'='Dumfries & Galloway'")

### Make sure the names have the same factor levels
bes$profile_oslaua <- factor(bes$profile_oslaua)
x$NAME <- factor(x$NAME,levels = levels(bes$profile_oslaua))

### Convert both to numeric
bes$lauth <- as.numeric(bes$profile_oslaua)
x$lauth <- as.numeric(x$NAME)

# Create spatial polygons object
gpclibPermit()
sp.const <- unionSpatialPolygons(x, x$lauth)
# Lose detail
sp.const <- gSimplify(sp.const,100)

## Get district size
dist.size <- as.data.frame(cbind(x$HECTARES, x$lauth))
colnames(dist.size) <- c("area","lauth")
## Order is important
dist.size<-dist.size[order(dist.size$lauth),]
rownames(dist.size)<-dist.size$lauth


## ----auxinfo, echo = FALSE-----------------------------------------------
## Read in auxiliary info
aux <- read.csv("lauth_auxiliary.csv",header=T)
aux$lauth <- as.numeric(factor(aux$Authority,levels = levels(bes$profile_oslaua)))
aux.seat <- merge(dist.size, aux, by = "lauth", sort = FALSE)
aux.seat$density <- aux.seat$PollingList20140911 / aux.seat$area
aux.seat$density.sc <- as.vector(scale(aux.seat$density))
aux.seat$SNP.sc <- as.vector(scale(aux.seat$SNPFirstVote))


## ----neighbours, echo = FALSE--------------------------------------------
area.spdf <- SpatialPolygonsDataFrame(sp.const, dist.size)

## Reorder according to lauth
area.spdf <- area.spdf[order(area.spdf$lauth),]

# Create spatial weights
nb.districts <- poly2nb(area.spdf)

### There are some joins to add
### Specifically
## [1] "Eilean Siar" (13)      "Orkney Islands" (23)   "Shetland Islands" (27)
## Each of these can be joined to the highlands (17)
joins.to.add <- data.frame(A = c(13,23,27),
	B = c(17,17,17))

for (i in 1:nrow(joins.to.add)) {
 #   a <- as.integer(const.lookup$refno.num[which(const.lookup$refno==joins.to.add[i,1])])
  #  b <- as.integer(const.lookup$refno.num[which(const.lookup$refno==joins.to.add[i,2])])
	a <- as.integer(joins.to.add[i,1])
	b <- as.integer(joins.to.add[i,2])
    nb.districts[[a]] <- c(nb.districts[[a]],b)
    nb.districts[[a]] <-nb.districts[[a]][nb.districts[[a]]!=0]
    nb.districts[[b]] <- c(nb.districts[[b]],a)
    nb.districts[[b]] <-nb.districts[[b]][nb.districts[[b]]!=0]
}
nb <- unlist(nb.districts)
weight <- rep(1, times=length(nb))
num <- card(nb.districts)


spdata <- list("nb", "weight", "num")


## ----bugsestimation, echo = FALSE, cache = TRUE--------------------------
model <- function() {

  for(i in 1:nObs) { # loop over unit observations
    y[i] ~ dbern(p[i])
    logit(p[i]) <- alpha + beta[constindex[i]] + v[constindex[i]]
  }

  for (j in 1:nConst) { ## loop over constituencies
      beta[j] <- beta.density*density[j] + beta.snp*snp[j] + u[j]
      u[j] ~ dnorm(0, tauu) ## unstructured RE
  }

  v[1:nConst] ~ car.normal(nb[], weight[], num[], tauv) # impose CAR distribution on CAR-structured REs

  alpha ~ dflat()
  beta.density ~ dflat()
  beta.snp ~ dflat()

  tauu <- pow(sigmau, -2)
  sigmau ~ dunif(1e-16,2)
    tauv <- pow(sigmav, -2)
    sigmav ~ dunif(1e-16,2)
}

write.model(model, "model.bug")

y <- bes$scotReferendumYes
nObs <- length(y)
nConst <- max(bes$lauth)


density <- as.numeric(aux.seat$density.sc)
snp <- aux.seat$SNP.sc
#distsize <- dist.size$area
constindex <- bes$lauth
data <- list("nObs", "nConst", "y", "constindex","density","snp")

inits <- function() {list(alpha=rnorm(1),
  beta.density=rnorm(1),
  beta.snp=rnorm(1),
  sigmau=runif(1,0,1),
  v=rnorm(nConst,0,1),
  u=rnorm(nConst,0,1),
  sigmav=runif(1,0,1))
}

getBugsDir <- function() {
  if (.Platform$OS.type=="unix") {
    return("/home/chris/.wine/drive_c/Program Files (x86)/WinBUGS14")
  }
}

my.bugs.directory<- getBugsDir()
set.seed(180914)
my.iter <- 5000
my.burnin <- 1000
my.thin <- 5
my.debug <- FALSE
model.sim <- bugs(data=c(data, spdata),
 inits=inits,
 model.file="model.bug",
  parameters.to.save=c("beta.density", "beta.snp","beta", "alpha","u","v"),
  n.chains=3,
  n.iter=my.iter,
  n.burnin=my.burnin,
  n.thin=my.thin,
  bugs.directory=my.bugs.directory,
#  working.directory=".",
  debug=my.debug)

attach.bugs(model.sim)


## ----predsout, echo = FALSE----------------------------------------------
## Get mean predictions
meanpreds <- inv.logit(model.sim$mean$alpha + model.sim$mean$beta +  model.sim$mean$v)

pred.df <- inv.logit(model.sim$sims.list$alpha + model.sim$sims.list$beta + model.sim$sims.list$v)
pred.df <- melt(pred.df)
names(pred.df) <- c("Simulation","lauth","pred")
pred.df <- ddply(pred.df,.(lauth),function(df) {
	data.frame(xbar = mean(df$pred),
		xhi = quantile(df$pred,0.95),
		xlo = quantile(df$pred,0.05))
})

### Now subtract the sample average
sample.avg <- mean(bes$scotReferendumYes,na.rm=T)
pred.df$xbar <- pred.df$xbar - sample.avg
pred.df$xhi <- pred.df$xhi - sample.avg
pred.df$xlo <- pred.df$xlo - sample.avg
pred.df$profile_oslaua <- levels(bes$profile_oslaua)[pred.df$lauth]

### Sort it
pred.df <- pred.df[order(pred.df$xbar),]
pred.df$profile_oslaua <- factor(pred.df$profile_oslaua,
	levels = pred.df$profile_oslaua,
	ordered = TRUE)

### Set up the labels
pred.df$hjust <- ifelse(pred.df$xbar > 0, 1, 0)
pred.df$xpos <- ifelse(pred.df$xbar >0 ,pred.df$xlo, pred.df$xhi)
adj <- diff(range(pred.df$xbar)) * 0.025
pred.df$xpos <- pred.df$xpos + adj * ifelse(pred.df$xbar > 0, -1, 1)
pred.df$Type <- ifelse(pred.df$xbar > 0, "More favourable to independence","Less favourable to independence")
yeslogo <- readPNG("yesshadow.png")
nologo <- readPNG("nothanks.png")
yeslogo <- rasterGrob(yeslogo, interpolate = TRUE)
nologo <- rasterGrob(nologo, interpolate = TRUE)
### Plot this
difference.plot <- ggplot(pred.df,
	aes(y = xbar, ymin = xlo, ymax = xhi, x = profile_oslaua)) +
	geom_pointrange(aes(colour= Type)) +
	scale_x_discrete("") +
	scale_colour_discrete("",guide = FALSE) +
	scale_y_continuous("Difference to national Yes %",
		breaks = c(-.1,-0.05,-0.01,.1,0.05,0.01),
		minor_breaks = seq(-0.12,0.12,by = 0.01),
		labels = percent) +
	geom_text(aes(x = profile_oslaua, label = profile_oslaua, hjust = hjust, y = xpos)) +
	geom_hline(yintercept = 0, colour = 'darkgray') +
	annotation_custom(nologo, xmin=4, xmax=8, ymin=-0.25, ymax=0) +
	annotation_custom(yeslogo, xmin=25.5, xmax=30.5, ymin=0, ymax=0.25) +
	coord_flip() +
	theme_bw() +
	theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(),
		panel.grid.minor.x = element_line(colour="#BBBBBB", size=0.25),
		panel.grid.minor.y = element_blank(),
		panel.grid.major.y = element_blank())


pdf(file="difference_plot.pdf",width = 8, height= 11)
print(difference.plot)
dev.off()

png(file="difference_plot.png",width = 8*72, height= 11*72)
print(difference.plot)
dev.off()

### Dump this as a CSV file
write.csv(pred.df,file="preds.csv",row.names= FALSE)

	## ----loadlibs, echo = FALSE----------------------------------------------
	library(foreign)
	library(lme4)
	library(maptools)
	library(car)
	library(R2WinBUGS)
	library(spdep)
	library(ggplot2)
	library(scales)
	library(grid)
	library(png)
	library(reshape)
	library(plyr)


	## ----loaddata, echo = FALSE----------------------------------------------
	bes <- read.spss("BES2015_W2_Panel_v2.0.sav",
	to.data.frame = TRUE)


	## ----subset, echo = FALSE------------------------------------------------
	### Subset
	### (1) to Scotland
	bes <- subset(bes, bes$gor == "Scotland")
	scots.respondents <- nrow(bes)
	table(as.character(bes$profile_oslaua))

	ew.lauths <- c("Bedford","Cardiff", "Mid Devon", "Mid Suffolk",
	"North Warwickshire","not used", "Redcar and Cleveland",
	"Sefton","Teignbridge")
	bes <- subset(bes, !is.element(profile_oslaua, ew.lauths))
	bes$profile_oslaua <- factor(as.character(bes$profile_oslaua))

	### (2) to those who will vote in the referendum
	table(as.character(bes$scotReferendumIntentionW2))
	bes <- subset(bes, scotReferendumIntentionW2 %in% c("Will vote no","Will vote 'Yes'"))
	bes$scotReferendumYes <- as.numeric(bes$scotReferendumIntentionW2 == "Will vote 'Yes'")


	## ----shapefiles, echo = FALSE--------------------------------------------
	### Next, get shapefiles, and from that, density
	x <- readShapePoly("shapefiles/district_borough_unitary_region.shp")
	### Get just Scottish local authorities
	x <- subset(x, NAME %in% c("Angus","Clackmannanshire", "Dundee City",
	"East Ayrshire", "East Dunbartonshire","East Renfrewshire", "Falkirk",
	"Glasgow City", "Inverclyde","Midlothian", "North Lanarkshire",
	"Perth and Kinross", "Renfrewshire","Scottish Borders", "South Lanarkshire",
	"Stirling", "West Dunbartonshire","West Lothian", "Highland",
	"Moray", "Orkney Islands","Na h-Eileanan an Iar", "Argyll and Bute",
	"Aberdeenshire", "Fife","Aberdeen City", "City of Edinburgh",
	"East Lothian", "Shetland Islands","North Ayrshire", "Dumfries and Galloway",
	"South Ayrshire"))
	x$NAME <- as.character(x$NAME)
	x$NAME <- car:::recode(x$NAME,
	"'Perth and Kinross'='Perth & Kinross';
	'Na h-Eileanan an Iar'='Eilean Siar';
	'Argyll and Bute'='Argyll & Bute';
	'City of Edinburgh'='Edinburgh, City of';
	'Dumfries and Galloway'='Dumfries & Galloway'")

	### Make sure the names have the same factor levels
	bes$profile_oslaua <- factor(bes$profile_oslaua)
	x$NAME <- factor(x$NAME,levels = levels(bes$profile_oslaua))

	### Convert both to numeric
	bes$lauth <- as.numeric(bes$profile_oslaua)
	x$lauth <- as.numeric(x$NAME)

	# Create spatial polygons object
	gpclibPermit()
	sp.const <- unionSpatialPolygons(x, x$lauth)
	# Lose detail
	sp.const <- gSimplify(sp.const,100)

	## Get district size
	dist.size <- as.data.frame(cbind(x$HECTARES, x$lauth))
	colnames(dist.size) <- c("area","lauth")
	## Order is important
	dist.size<-dist.size[order(dist.size$lauth),]
	rownames(dist.size)<-dist.size$lauth


	## ----auxinfo, echo = FALSE-----------------------------------------------
	## Read in auxiliary info
	aux <- read.csv("lauth_auxiliary.csv",header=T)
	aux$lauth <- as.numeric(factor(aux$Authority,levels = levels(bes$profile_oslaua)))
	aux.seat <- merge(dist.size, aux, by = "lauth", sort = FALSE)
	aux.seat$density <- aux.seat$PollingList20140911 / aux.seat$area
	aux.seat$density.sc <- as.vector(scale(aux.seat$density))
	aux.seat$SNP.sc <- as.vector(scale(aux.seat$SNPFirstVote))



	## ----neighbours, echo = FALSE--------------------------------------------
	area.spdf <- SpatialPolygonsDataFrame(sp.const, dist.size)

	## Reorder according to lauth
	area.spdf <- area.spdf[order(area.spdf$lauth),]

	# Create spatial weights
	nb.districts <- poly2nb(area.spdf)

	### There are some joins to add
	### Specifically
	## [1] "Eilean Siar" (13) "Orkney Islands" (23) "Shetland Islands" (27)
	## Each of these can be joined to the highlands (17)
	joins.to.add <- data.frame(A = c(13,23,27),
	B = c(17,17,17))

	for (i in 1:nrow(joins.to.add)) {
	# a <- as.integer(const.lookup$refno.num[which(const.lookup$refno==joins.to.add[i,1])])
	# b <- as.integer(const.lookup$refno.num[which(const.lookup$refno==joins.to.add[i,2])])
	a <- as.integer(joins.to.add[i,1])
	b <- as.integer(joins.to.add[i,2])
	nb.districts[[a]] <- c(nb.districts[[a]],b)
	nb.districts[[a]] <-nb.districts[[a]][nb.districts[[a]]!=0]
	nb.districts[[b]] <- c(nb.districts[[b]],a)
	nb.districts[[b]] <-nb.districts[[b]][nb.districts[[b]]!=0]
	}
	nb <- unlist(nb.districts)
	weight <- rep(1, times=length(nb))
	num <- card(nb.districts)



	spdata <- list("nb", "weight", "num")




	## ----bugsestimation, echo = FALSE, cache = TRUE--------------------------
	model <- function() {

	for(i in 1:nObs) { # loop over unit observations
	y[i] ~ dbern(p[i])
	logit(p[i]) <- alpha + beta[constindex[i]] + v[constindex[i]]
	}

	for (j in 1:nConst) { ## loop over constituencies
	beta[j] <- beta.densitydensity[j] + beta.snpsnp[j] + u[j]
	u[j] ~ dnorm(0, tauu) ## unstructured RE
	}

	v[1:nConst] ~ car.normal(nb[], weight[], num[], tauv) # impose CAR distribution on CAR-structured REs

	alpha ~ dflat()
	beta.density ~ dflat()
	beta.snp ~ dflat()

	tauu <- pow(sigmau, -2)
	sigmau ~ dunif(1e-16,2)
	tauv <- pow(sigmav, -2)
	sigmav ~ dunif(1e-16,2)
	}

	write.model(model, "model.bug")

	y <- bes$scotReferendumYes
	nObs <- length(y)
	nConst <- max(bes$lauth)


	density <- as.numeric(aux.seat$density.sc)
	snp <- aux.seat$SNP.sc
	#distsize <- dist.size$area
	constindex <- bes$lauth
	data <- list("nObs", "nConst", "y", "constindex","density","snp")

	inits <- function() {list(alpha=rnorm(1),
	beta.density=rnorm(1),
	beta.snp=rnorm(1),
	sigmau=runif(1,0,1),
	v=rnorm(nConst,0,1),
	u=rnorm(nConst,0,1),
	sigmav=runif(1,0,1))
	}

	getBugsDir <- function() {
	if (.Platform$OS.type=="unix") {
	return("/home/chris/.wine/drive_c/Program Files (x86)/WinBUGS14")
	}
	}

	my.bugs.directory<- getBugsDir()
	set.seed(180914)
	my.iter <- 5000
	my.burnin <- 1000
	my.thin <- 5
	my.debug <- FALSE
	model.sim <- bugs(data=c(data, spdata),
	inits=inits,
	model.file="model.bug",
	parameters.to.save=c("beta.density", "beta.snp","beta", "alpha","u","v"),
	n.chains=3,
	n.iter=my.iter,
	n.burnin=my.burnin,
	n.thin=my.thin,
	bugs.directory=my.bugs.directory,
	# working.directory=".",
	debug=my.debug)

	attach.bugs(model.sim)



	## ----predsout, echo = FALSE----------------------------------------------
	## Get mean predictions
	meanpreds <- inv.logit(model.sim$mean$alpha + model.sim$mean$beta + model.sim$mean$v)

	pred.df <- inv.logit(model.sim$sims.list$alpha + model.sim$sims.list$beta + model.sim$sims.list$v)
	pred.df <- melt(pred.df)
	names(pred.df) <- c("Simulation","lauth","pred")
	pred.df <- ddply(pred.df,.(lauth),function(df) {
	data.frame(xbar = mean(df$pred),
	xhi = quantile(df$pred,0.95),
	xlo = quantile(df$pred,0.05))
	})

	### Now subtract the sample average
	sample.avg <- mean(bes$scotReferendumYes,na.rm=T)
	pred.df$xbar <- pred.df$xbar - sample.avg
	pred.df$xhi <- pred.df$xhi - sample.avg
	pred.df$xlo <- pred.df$xlo - sample.avg
	pred.df$profile_oslaua <- levels(bes$profile_oslaua)[pred.df$lauth]

	### Sort it
	pred.df <- pred.df[order(pred.df$xbar),]
	pred.df$profile_oslaua <- factor(pred.df$profile_oslaua,
	levels = pred.df$profile_oslaua,
	ordered = TRUE)

	### Set up the labels
	pred.df$hjust <- ifelse(pred.df$xbar > 0, 1, 0)
	pred.df$xpos <- ifelse(pred.df$xbar >0 ,pred.df$xlo, pred.df$xhi)
	adj <- diff(range(pred.df$xbar)) * 0.025
	pred.df$xpos <- pred.df$xpos + adj * ifelse(pred.df$xbar > 0, -1, 1)
	pred.df$Type <- ifelse(pred.df$xbar > 0, "More favourable to independence","Less favourable to independence")
	yeslogo <- readPNG("yesshadow.png")
	nologo <- readPNG("nothanks.png")
	yeslogo <- rasterGrob(yeslogo, interpolate = TRUE)
	nologo <- rasterGrob(nologo, interpolate = TRUE)
	### Plot this
	difference.plot <- ggplot(pred.df,
	aes(y = xbar, ymin = xlo, ymax = xhi, x = profile_oslaua)) +
	geom_pointrange(aes(colour= Type)) +
	scale_x_discrete("") +
	scale_colour_discrete("",guide = FALSE) +
	scale_y_continuous("Difference to national Yes %",
	breaks = c(-.1,-0.05,-0.01,.1,0.05,0.01),
	minor_breaks = seq(-0.12,0.12,by = 0.01),
	labels = percent) +
	geom_text(aes(x = profile_oslaua, label = profile_oslaua, hjust = hjust, y = xpos)) +
	geom_hline(yintercept = 0, colour = 'darkgray') +
	annotation_custom(nologo, xmin=4, xmax=8, ymin=-0.25, ymax=0) +
	annotation_custom(yeslogo, xmin=25.5, xmax=30.5, ymin=0, ymax=0.25) +
	coord_flip() +
	theme_bw() +
	theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(),
	panel.grid.minor.x = element_line(colour="#BBBBBB", size=0.25),
	panel.grid.minor.y = element_blank(),
	panel.grid.major.y = element_blank())


	pdf(file="difference_plot.pdf",width = 8, height= 11)
	print(difference.plot)
	dev.off()

	png(file="difference_plot.png",width = 872, height= 1172)
	print(difference.plot)
	dev.off()

	### Dump this as a CSV file
	write.csv(pred.df,file="preds.csv",row.names= FALSE)