Created
May 20, 2017 13:45
-
-
Save chrishanretty/f9e6467f43ce3d6357fb3ba18557286b to your computer and use it in GitHub Desktop.
Dirichlet-multinomial seat model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(reshape2) | |
library(nnet) | |
library(multinomRob) | |
library(VGAM) | |
#type <- "nnet" | |
type <- "vgam" | |
sim.multinom <- function(object = mod, newdata = NULL, nSims = 10) { | |
### Some of this code is hacked from the internals of | |
### nnet::predict.multinom | |
## k = overdispersion/scale parameter | |
k <- 1 | |
require(MASS) | |
require(nnet) | |
coefs <- coef(object) | |
party.names <- dimnames(coefs)[[1]] | |
nOpts <- dim(coefs)[1] | |
nVars <- dim(coefs)[2] | |
vc <- vcov(object) | |
vc <- k * vc | |
coefs <- as.vector(t(coefs)) | |
simcoefs <- mvrnorm(n = nSims, mu = coefs, Sigma = vc) | |
### (predict.multinom) | |
newdata <- as.data.frame(newdata) | |
rn <- row.names(newdata) | |
Terms <- delete.response(object$terms) | |
m <- model.matrix(Terms, newdata, na.action = na.omit, | |
xlev = object$xlevels) | |
simprobs <- array(NA, dim = c(nSims, nrow(m), nOpts + 1)) | |
dimnames(simprobs)[[2]] <- rn | |
dimnames(simprobs)[[3]] <- c("Conservative", party.names) | |
for (i in 1:nSims) { | |
out <- rep(1, nrow(m)) | |
for (n in 1:nOpts) { | |
slicestart <- ((n-1)*nVars+1) | |
slicestop <- n * nVars | |
out <- cbind(out, exp(m %*% simcoefs[i, slicestart:slicestop])) | |
} | |
out <- out / rowSums(out) | |
colnames(out) <- c("Conservative", party.names) | |
simprobs[i,,] <- out | |
} | |
return(simprobs) | |
} | |
sim.dirmult <- function(object = mod, newdata = NULL, nSims = 10) { | |
### Some of this code is hacked from the internals of | |
### nnet::predict.multinom | |
require(VGAM) | |
require(MASS) | |
coefs <- coef(object) | |
nOpts <- ncol(object@y) - 1 | |
nVars <- length(coefs) | |
vc <- vcov(object) | |
coefs <- as.vector(t(coefs)) | |
simcoefs <- mvrnorm(n = nSims, mu = coefs, Sigma = vc) | |
### (predict.multinom) | |
newdata <- as.data.frame(newdata) | |
rn <- row.names(newdata) | |
Terms <- delete.response(object@terms) | |
m <- model.matrix(Terms, newdata, na.action = na.omit, | |
xlev = object@xlevels) | |
simprobs <- array(NA, dim = c(nSims, nrow(m), nOpts + 1)) | |
dimnames(simprobs)[[2]] <- rn | |
for (i in 1:nSims) { | |
out <- rep(1, nrow(m)) | |
for (n in 1:nOpts) { | |
varpos <- grep(paste0(":", n, "$"), colnames(simcoefs)) | |
out <- cbind(out, exp(m %*% simcoefs[i, varpos])) | |
} | |
### Note: VGAM uses last category as reference category | |
out <- out[, c(2:ncol(out), 1)] | |
out <- out / rowSums(out) | |
simprobs[i,,] <- out | |
} | |
return(simprobs) | |
} | |
election.date <- as.Date("2017-06-08") | |
origin.date <- election.date - 365 | |
nSims <- 1000 | |
### | |
###dat <- readRDS(file="working/constituency/2017constituencypollingdata.rds") | |
### | |
### Replace this w/ your path, and download file from | |
### https://drive.google.com/open?id=0B1LkeTCb2GrxTk1pU2dtREZWSUU | |
### | |
### Leave vote | |
dat$Figure.to.use <- dat$Figure.to.use - 0.5 | |
eng <- subset(dat, grepl("^E", ONSConstID)) | |
wal <- subset(dat, grepl("^W", ONSConstID)) | |
sco <- subset(dat, grepl("^S", ONSConstID)) | |
rownames(eng) <- eng$ONSConstID | |
rownames(wal) <- wal$ONSConstID | |
rownames(sco) <- sco$ONSConstID | |
### England | |
### Zero out Buckingham | |
eng[which(eng$pcon == "Buckingham"), | |
c("Conservative", "Labour", "Liberal Democrat", | |
"Green Party", "United Kingdom Independence Party (UKIP)", | |
"Other")] <- c(rep(0, 5), 1) | |
eng$Winner15 <- dplyr::recode(eng$Winner15, | |
Conservative = 'Conservative', | |
Labour = 'Labour', | |
.default = "Other") | |
eng$Winner15 <- relevel(eng$Winner15, "Other") | |
eng$Region <- as.character(eng$Region) | |
eng.form <- cbind(Conservative, Labour, `Liberal Democrat`, | |
`Green Party`, `United Kingdom Independence Party (UKIP)`, | |
Other) ~ | |
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(UKIP15)) * Winner15 + | |
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(UKIP15)) * Figure.to.use + | |
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(UKIP15)) * Region | |
if (type != "vgam") { | |
AIC(eng_mod <- multinom(eng.form, | |
data = eng, | |
weights = eng$total, | |
maxit = 250)) | |
fits <- fitted(eng_mod) | |
eng_tally <- apply(fits, 1, which.max) | |
eng_sims <- sim.multinom(eng_mod, eng, nSims = nSims) | |
} else { | |
eng.form <- update(eng.form, | |
cbind(Conservative, Labour, `Liberal Democrat`, | |
`Green Party`, `United Kingdom Independence Party (UKIP)`, | |
Other) ~ .) | |
M <- 6 | |
eng_mod <- vglm(eng.form, | |
dirmultinomial, | |
data= eng, | |
maxit = 250) | |
fits <- fitted(eng_mod) | |
eng_sims <- sim.dirmult(eng_mod, eng, nSims = nSims) | |
### Re-order | |
### eng_sims <- eng_sims[,,c(6,2:5,1)] | |
} | |
### Check | |
predbar <- apply(eng_sims, c(2,3), mean) | |
cook <- matrix(NA, nrow = nrow(predbar), ncol = ncol(predbar)) | |
for (i in 1:6) { | |
y <- fits[,i] | |
x <- predbar[,i] | |
mod <- lm(y ~ x) | |
cook[,i] <- cooks.distance(mod) | |
} | |
### Zero out Cook's distance for Buckingham | |
cook[which(rownames(predbar) == "E14000608"),] <- rep(NA, ncol(cook)) | |
if (any(cook > 4)) { | |
warning(paste0(sum(cook > 4, na.rm = TRUE), | |
" obs w/ Cook's distance > 4 in English data; funky covariance matrix")) | |
pos <- which(apply(cook, 1, max) > 4) | |
eng_sims[,pos,] <- matrix(fits[pos,], nrow = nSims, ncol = 6, byrow = TRUE) | |
} | |
### Now repeat for Scotland | |
### Scotland | |
if (type == "vgam") { | |
sco$Winner15 <- as.character(sco$Winner15) | |
sco$Winner15 <- dplyr::recode(sco$Winner15, | |
`Scottish National Party`='Scottish National Party', | |
.default = "Other") | |
sco.form <- cbind(Conservative, Labour, `Liberal Democrat`, `Scottish National Party (SNP)`, | |
`Green Party`, `United Kingdom Independence Party (UKIP)`, | |
Other) ~ | |
(log1p(Con15) + log1p(SNP15)) * Winner15 + | |
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(SNP15)) * Figure.to.use | |
} else { | |
sco$Winner15 <- factor(as.character(sco$Winner15)) | |
sco.form <- cbind(Other, Labour, `Liberal Democrat`, `Scottish National Party (SNP)`, | |
`Green Party`, `United Kingdom Independence Party (UKIP)`, | |
Conservative) ~ | |
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(SNP15)) * Winner15 + | |
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(SNP15)) * Figure.to.use | |
} | |
if (type != "vgam") { | |
AIC(sco_mod <- multinom(sco.form, | |
data = sco, | |
weights = sco$total, | |
maxit = 250)) | |
fits <- fitted(sco_mod) | |
sco_tally <- apply(fits, 1, which.max) | |
sco_sims <- sim.multinom(sco_mod, sco, nSims = nSims) | |
} else { | |
M <- 7 | |
sco_mod <- vglm(sco.form, | |
dirmultinomial, | |
data= sco, | |
maxit = 250) | |
fits <- fitted(sco_mod) | |
sco_sims <- sim.dirmult(sco_mod, sco, nSims = nSims) | |
### Re-order | |
sco_sims <- sco_sims[,,c(7,2:6,1)] | |
} | |
### Check whether there are issues w/ flakey predictions | |
predbar <- apply(sco_sims, c(2,3), mean) | |
cook <- numeric(0) | |
for (i in 1:7) { | |
y <- fits[,i] | |
x <- predbar[,i] | |
mod <- lm(y ~ x) | |
cook <- c(cook, cooks.distance(mod)) | |
} | |
if (any(cook > 4)) { | |
warning("Cook's distance > 4 in Scottish data!") | |
pos <- which(apply(cook, 1, max) > 4) | |
sco_sims[,pos,] <- matrix(fits[pos,], nrow = nSims, ncol = 6, byrow = TRUE) | |
} | |
### Wales | |
if (type == "vgam") { | |
wal$Winner15 <- as.character(wal$Winner15) | |
wal$Winner15 <- dplyr::recode(wal$Winner15, | |
Labour = 'Labour', | |
.default = 'Other') | |
wal.form <- cbind(Other, Labour, `Liberal Democrat`, `Plaid Cymru`, | |
`Green Party`, `United Kingdom Independence Party (UKIP)`, | |
Conservative) ~ | |
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(PC15)) * Winner15 + | |
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(PC15)) * Figure.to.use | |
} else { | |
wal.form <- cbind(Conservative, Labour, `Liberal Democrat`, `Plaid Cymru`, | |
`Green Party`, `United Kingdom Independence Party (UKIP)`, | |
Other) ~ | |
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(PC15)) * Winner15 + | |
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(PC15)) * Figure.to.use | |
wal$Winner15 <- factor(as.character(wal$Winner15)) | |
} | |
if (type != "vgam") { | |
AIC(wal_mod <- multinom(wal.form, | |
data = wal, | |
weights = wal$total, | |
maxit = 250)) | |
fits <- fitted(wal_mod) | |
wal_tally <- apply(fits, 1, which.max) | |
wal_sims <- sim.multinom(wal_mod, wal, nSims = nSims) | |
} else { | |
M <- 7 | |
wal_mod <- vglm(wal.form, | |
dirmultinomial, | |
data= wal, | |
maxit = 250) | |
wal_sims <- sim.dirmult(wal_mod, wal, nSims = nSims) | |
### Re-order | |
wal_sims <- wal_sims[,,c(7,2:6,1)] | |
} | |
### Check whether there are issues w/ flakey predictions | |
predbar <- apply(wal_sims, c(2,3), mean) | |
cook <- numeric(0) | |
for (i in 1:7) { | |
y <- fits[,i] | |
x <- predbar[,i] | |
mod <- lm(y ~ x) | |
cook <- c(cook, cooks.distance(mod)) | |
} | |
if (any(cook > 4)) { | |
warning("Cook's distance > 4 in Scottish data!") | |
pos <- which(apply(cook, 1, max) > 4) | |
wal_sims[,pos,] <- matrix(fits[pos,], nrow = nSims, ncol = 6, byrow = TRUE) | |
} | |
### What is the variance across simulations? | |
simvar <- apply(eng_sims, c(2, 3), sd) | |
summary(as.vector(unlist(simvar))) | |
### Bind these matrices, filling in zeros | |
nParties <- 8 | |
nEngConst <- dim(eng_sims)[2] | |
nWalConst <- dim(wal_sims)[2] | |
nScoConst <- dim(sco_sims)[2] | |
nConst <- nEngConst + nWalConst + nScoConst | |
sims <- array(0, dim = c(nSims, nConst, nParties)) | |
sims[1:nSims, 1:nEngConst, c(1:3, 6:8)] <- eng_sims | |
sims[1:nSims, (nEngConst+1):(nEngConst + nWalConst), c(1:3, 5:8)] <- wal_sims | |
sims[1:nSims, (nEngConst+nWalConst+1):nConst, c(1:4,6:8)] <- sco_sims | |
dimnames(sims)[[2]] <- c(dimnames(eng_sims)[[2]], | |
dimnames(wal_sims)[[2]], | |
dimnames(sco_sims)[[2]]) | |
refnos <- dimnames(sims)[[2]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment