Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Created September 12, 2014 14:52
Show Gist options
  • Save chrishanretty/cefe342c48f02ff9a763 to your computer and use it in GitHub Desktop.
Save chrishanretty/cefe342c48f02ff9a763 to your computer and use it in GitHub Desktop.
Small area estimates of Scottish independence referendum vote intention
## ----loadlibs, echo = FALSE----------------------------------------------
library(foreign)
library(lme4)
library(maptools)
library(car)
library(R2WinBUGS)
library(spdep)
library(ggplot2)
library(scales)
library(grid)
library(png)
library(reshape)
library(plyr)
## ----loaddata, echo = FALSE----------------------------------------------
bes <- read.spss("BES2015_W2_Panel_v2.0.sav",
to.data.frame = TRUE)
## ----subset, echo = FALSE------------------------------------------------
### Subset
### (1) to Scotland
bes <- subset(bes, bes$gor == "Scotland")
scots.respondents <- nrow(bes)
table(as.character(bes$profile_oslaua))
ew.lauths <- c("Bedford","Cardiff", "Mid Devon", "Mid Suffolk",
"North Warwickshire","not used", "Redcar and Cleveland",
"Sefton","Teignbridge")
bes <- subset(bes, !is.element(profile_oslaua, ew.lauths))
bes$profile_oslaua <- factor(as.character(bes$profile_oslaua))
### (2) to those who will vote in the referendum
table(as.character(bes$scotReferendumIntentionW2))
bes <- subset(bes, scotReferendumIntentionW2 %in% c("Will vote no","Will vote 'Yes'"))
bes$scotReferendumYes <- as.numeric(bes$scotReferendumIntentionW2 == "Will vote 'Yes'")
## ----shapefiles, echo = FALSE--------------------------------------------
### Next, get shapefiles, and from that, density
x <- readShapePoly("shapefiles/district_borough_unitary_region.shp")
### Get just Scottish local authorities
x <- subset(x, NAME %in% c("Angus","Clackmannanshire", "Dundee City",
"East Ayrshire", "East Dunbartonshire","East Renfrewshire", "Falkirk",
"Glasgow City", "Inverclyde","Midlothian", "North Lanarkshire",
"Perth and Kinross", "Renfrewshire","Scottish Borders", "South Lanarkshire",
"Stirling", "West Dunbartonshire","West Lothian", "Highland",
"Moray", "Orkney Islands","Na h-Eileanan an Iar", "Argyll and Bute",
"Aberdeenshire", "Fife","Aberdeen City", "City of Edinburgh",
"East Lothian", "Shetland Islands","North Ayrshire", "Dumfries and Galloway",
"South Ayrshire"))
x$NAME <- as.character(x$NAME)
x$NAME <- car:::recode(x$NAME,
"'Perth and Kinross'='Perth & Kinross';
'Na h-Eileanan an Iar'='Eilean Siar';
'Argyll and Bute'='Argyll & Bute';
'City of Edinburgh'='Edinburgh, City of';
'Dumfries and Galloway'='Dumfries & Galloway'")
### Make sure the names have the same factor levels
bes$profile_oslaua <- factor(bes$profile_oslaua)
x$NAME <- factor(x$NAME,levels = levels(bes$profile_oslaua))
### Convert both to numeric
bes$lauth <- as.numeric(bes$profile_oslaua)
x$lauth <- as.numeric(x$NAME)
# Create spatial polygons object
gpclibPermit()
sp.const <- unionSpatialPolygons(x, x$lauth)
# Lose detail
sp.const <- gSimplify(sp.const,100)
## Get district size
dist.size <- as.data.frame(cbind(x$HECTARES, x$lauth))
colnames(dist.size) <- c("area","lauth")
## Order is important
dist.size<-dist.size[order(dist.size$lauth),]
rownames(dist.size)<-dist.size$lauth
## ----auxinfo, echo = FALSE-----------------------------------------------
## Read in auxiliary info
aux <- read.csv("lauth_auxiliary.csv",header=T)
aux$lauth <- as.numeric(factor(aux$Authority,levels = levels(bes$profile_oslaua)))
aux.seat <- merge(dist.size, aux, by = "lauth", sort = FALSE)
aux.seat$density <- aux.seat$PollingList20140911 / aux.seat$area
aux.seat$density.sc <- as.vector(scale(aux.seat$density))
aux.seat$SNP.sc <- as.vector(scale(aux.seat$SNPFirstVote))
## ----neighbours, echo = FALSE--------------------------------------------
area.spdf <- SpatialPolygonsDataFrame(sp.const, dist.size)
## Reorder according to lauth
area.spdf <- area.spdf[order(area.spdf$lauth),]
# Create spatial weights
nb.districts <- poly2nb(area.spdf)
### There are some joins to add
### Specifically
## [1] "Eilean Siar" (13) "Orkney Islands" (23) "Shetland Islands" (27)
## Each of these can be joined to the highlands (17)
joins.to.add <- data.frame(A = c(13,23,27),
B = c(17,17,17))
for (i in 1:nrow(joins.to.add)) {
# a <- as.integer(const.lookup$refno.num[which(const.lookup$refno==joins.to.add[i,1])])
# b <- as.integer(const.lookup$refno.num[which(const.lookup$refno==joins.to.add[i,2])])
a <- as.integer(joins.to.add[i,1])
b <- as.integer(joins.to.add[i,2])
nb.districts[[a]] <- c(nb.districts[[a]],b)
nb.districts[[a]] <-nb.districts[[a]][nb.districts[[a]]!=0]
nb.districts[[b]] <- c(nb.districts[[b]],a)
nb.districts[[b]] <-nb.districts[[b]][nb.districts[[b]]!=0]
}
nb <- unlist(nb.districts)
weight <- rep(1, times=length(nb))
num <- card(nb.districts)
spdata <- list("nb", "weight", "num")
## ----bugsestimation, echo = FALSE, cache = TRUE--------------------------
model <- function() {
for(i in 1:nObs) { # loop over unit observations
y[i] ~ dbern(p[i])
logit(p[i]) <- alpha + beta[constindex[i]] + v[constindex[i]]
}
for (j in 1:nConst) { ## loop over constituencies
beta[j] <- beta.density*density[j] + beta.snp*snp[j] + u[j]
u[j] ~ dnorm(0, tauu) ## unstructured RE
}
v[1:nConst] ~ car.normal(nb[], weight[], num[], tauv) # impose CAR distribution on CAR-structured REs
alpha ~ dflat()
beta.density ~ dflat()
beta.snp ~ dflat()
tauu <- pow(sigmau, -2)
sigmau ~ dunif(1e-16,2)
tauv <- pow(sigmav, -2)
sigmav ~ dunif(1e-16,2)
}
write.model(model, "model.bug")
y <- bes$scotReferendumYes
nObs <- length(y)
nConst <- max(bes$lauth)
density <- as.numeric(aux.seat$density.sc)
snp <- aux.seat$SNP.sc
#distsize <- dist.size$area
constindex <- bes$lauth
data <- list("nObs", "nConst", "y", "constindex","density","snp")
inits <- function() {list(alpha=rnorm(1),
beta.density=rnorm(1),
beta.snp=rnorm(1),
sigmau=runif(1,0,1),
v=rnorm(nConst,0,1),
u=rnorm(nConst,0,1),
sigmav=runif(1,0,1))
}
getBugsDir <- function() {
if (.Platform$OS.type=="unix") {
return("/home/chris/.wine/drive_c/Program Files (x86)/WinBUGS14")
}
}
my.bugs.directory<- getBugsDir()
set.seed(180914)
my.iter <- 5000
my.burnin <- 1000
my.thin <- 5
my.debug <- FALSE
model.sim <- bugs(data=c(data, spdata),
inits=inits,
model.file="model.bug",
parameters.to.save=c("beta.density", "beta.snp","beta", "alpha","u","v"),
n.chains=3,
n.iter=my.iter,
n.burnin=my.burnin,
n.thin=my.thin,
bugs.directory=my.bugs.directory,
# working.directory=".",
debug=my.debug)
attach.bugs(model.sim)
## ----predsout, echo = FALSE----------------------------------------------
## Get mean predictions
meanpreds <- inv.logit(model.sim$mean$alpha + model.sim$mean$beta + model.sim$mean$v)
pred.df <- inv.logit(model.sim$sims.list$alpha + model.sim$sims.list$beta + model.sim$sims.list$v)
pred.df <- melt(pred.df)
names(pred.df) <- c("Simulation","lauth","pred")
pred.df <- ddply(pred.df,.(lauth),function(df) {
data.frame(xbar = mean(df$pred),
xhi = quantile(df$pred,0.95),
xlo = quantile(df$pred,0.05))
})
### Now subtract the sample average
sample.avg <- mean(bes$scotReferendumYes,na.rm=T)
pred.df$xbar <- pred.df$xbar - sample.avg
pred.df$xhi <- pred.df$xhi - sample.avg
pred.df$xlo <- pred.df$xlo - sample.avg
pred.df$profile_oslaua <- levels(bes$profile_oslaua)[pred.df$lauth]
### Sort it
pred.df <- pred.df[order(pred.df$xbar),]
pred.df$profile_oslaua <- factor(pred.df$profile_oslaua,
levels = pred.df$profile_oslaua,
ordered = TRUE)
### Set up the labels
pred.df$hjust <- ifelse(pred.df$xbar > 0, 1, 0)
pred.df$xpos <- ifelse(pred.df$xbar >0 ,pred.df$xlo, pred.df$xhi)
adj <- diff(range(pred.df$xbar)) * 0.025
pred.df$xpos <- pred.df$xpos + adj * ifelse(pred.df$xbar > 0, -1, 1)
pred.df$Type <- ifelse(pred.df$xbar > 0, "More favourable to independence","Less favourable to independence")
yeslogo <- readPNG("yesshadow.png")
nologo <- readPNG("nothanks.png")
yeslogo <- rasterGrob(yeslogo, interpolate = TRUE)
nologo <- rasterGrob(nologo, interpolate = TRUE)
### Plot this
difference.plot <- ggplot(pred.df,
aes(y = xbar, ymin = xlo, ymax = xhi, x = profile_oslaua)) +
geom_pointrange(aes(colour= Type)) +
scale_x_discrete("") +
scale_colour_discrete("",guide = FALSE) +
scale_y_continuous("Difference to national Yes %",
breaks = c(-.1,-0.05,-0.01,.1,0.05,0.01),
minor_breaks = seq(-0.12,0.12,by = 0.01),
labels = percent) +
geom_text(aes(x = profile_oslaua, label = profile_oslaua, hjust = hjust, y = xpos)) +
geom_hline(yintercept = 0, colour = 'darkgray') +
annotation_custom(nologo, xmin=4, xmax=8, ymin=-0.25, ymax=0) +
annotation_custom(yeslogo, xmin=25.5, xmax=30.5, ymin=0, ymax=0.25) +
coord_flip() +
theme_bw() +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(),
panel.grid.minor.x = element_line(colour="#BBBBBB", size=0.25),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank())
pdf(file="difference_plot.pdf",width = 8, height= 11)
print(difference.plot)
dev.off()
png(file="difference_plot.png",width = 8*72, height= 11*72)
print(difference.plot)
dev.off()
### Dump this as a CSV file
write.csv(pred.df,file="preds.csv",row.names= FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment