Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Created May 20, 2017 13:45
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chrishanretty/f9e6467f43ce3d6357fb3ba18557286b to your computer and use it in GitHub Desktop.
Save chrishanretty/f9e6467f43ce3d6357fb3ba18557286b to your computer and use it in GitHub Desktop.
Dirichlet-multinomial seat model
library(dplyr)
library(reshape2)
library(nnet)
library(multinomRob)
library(VGAM)
#type <- "nnet"
type <- "vgam"
sim.multinom <- function(object = mod, newdata = NULL, nSims = 10) {
### Some of this code is hacked from the internals of
### nnet::predict.multinom
## k = overdispersion/scale parameter
k <- 1
require(MASS)
require(nnet)
coefs <- coef(object)
party.names <- dimnames(coefs)[[1]]
nOpts <- dim(coefs)[1]
nVars <- dim(coefs)[2]
vc <- vcov(object)
vc <- k * vc
coefs <- as.vector(t(coefs))
simcoefs <- mvrnorm(n = nSims, mu = coefs, Sigma = vc)
### (predict.multinom)
newdata <- as.data.frame(newdata)
rn <- row.names(newdata)
Terms <- delete.response(object$terms)
m <- model.matrix(Terms, newdata, na.action = na.omit,
xlev = object$xlevels)
simprobs <- array(NA, dim = c(nSims, nrow(m), nOpts + 1))
dimnames(simprobs)[[2]] <- rn
dimnames(simprobs)[[3]] <- c("Conservative", party.names)
for (i in 1:nSims) {
out <- rep(1, nrow(m))
for (n in 1:nOpts) {
slicestart <- ((n-1)*nVars+1)
slicestop <- n * nVars
out <- cbind(out, exp(m %*% simcoefs[i, slicestart:slicestop]))
}
out <- out / rowSums(out)
colnames(out) <- c("Conservative", party.names)
simprobs[i,,] <- out
}
return(simprobs)
}
sim.dirmult <- function(object = mod, newdata = NULL, nSims = 10) {
### Some of this code is hacked from the internals of
### nnet::predict.multinom
require(VGAM)
require(MASS)
coefs <- coef(object)
nOpts <- ncol(object@y) - 1
nVars <- length(coefs)
vc <- vcov(object)
coefs <- as.vector(t(coefs))
simcoefs <- mvrnorm(n = nSims, mu = coefs, Sigma = vc)
### (predict.multinom)
newdata <- as.data.frame(newdata)
rn <- row.names(newdata)
Terms <- delete.response(object@terms)
m <- model.matrix(Terms, newdata, na.action = na.omit,
xlev = object@xlevels)
simprobs <- array(NA, dim = c(nSims, nrow(m), nOpts + 1))
dimnames(simprobs)[[2]] <- rn
for (i in 1:nSims) {
out <- rep(1, nrow(m))
for (n in 1:nOpts) {
varpos <- grep(paste0(":", n, "$"), colnames(simcoefs))
out <- cbind(out, exp(m %*% simcoefs[i, varpos]))
}
### Note: VGAM uses last category as reference category
out <- out[, c(2:ncol(out), 1)]
out <- out / rowSums(out)
simprobs[i,,] <- out
}
return(simprobs)
}
election.date <- as.Date("2017-06-08")
origin.date <- election.date - 365
nSims <- 1000
###
###dat <- readRDS(file="working/constituency/2017constituencypollingdata.rds")
###
### Replace this w/ your path, and download file from
### https://drive.google.com/open?id=0B1LkeTCb2GrxTk1pU2dtREZWSUU
###
### Leave vote
dat$Figure.to.use <- dat$Figure.to.use - 0.5
eng <- subset(dat, grepl("^E", ONSConstID))
wal <- subset(dat, grepl("^W", ONSConstID))
sco <- subset(dat, grepl("^S", ONSConstID))
rownames(eng) <- eng$ONSConstID
rownames(wal) <- wal$ONSConstID
rownames(sco) <- sco$ONSConstID
### England
### Zero out Buckingham
eng[which(eng$pcon == "Buckingham"),
c("Conservative", "Labour", "Liberal Democrat",
"Green Party", "United Kingdom Independence Party (UKIP)",
"Other")] <- c(rep(0, 5), 1)
eng$Winner15 <- dplyr::recode(eng$Winner15,
Conservative = 'Conservative',
Labour = 'Labour',
.default = "Other")
eng$Winner15 <- relevel(eng$Winner15, "Other")
eng$Region <- as.character(eng$Region)
eng.form <- cbind(Conservative, Labour, `Liberal Democrat`,
`Green Party`, `United Kingdom Independence Party (UKIP)`,
Other) ~
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(UKIP15)) * Winner15 +
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(UKIP15)) * Figure.to.use +
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(UKIP15)) * Region
if (type != "vgam") {
AIC(eng_mod <- multinom(eng.form,
data = eng,
weights = eng$total,
maxit = 250))
fits <- fitted(eng_mod)
eng_tally <- apply(fits, 1, which.max)
eng_sims <- sim.multinom(eng_mod, eng, nSims = nSims)
} else {
eng.form <- update(eng.form,
cbind(Conservative, Labour, `Liberal Democrat`,
`Green Party`, `United Kingdom Independence Party (UKIP)`,
Other) ~ .)
M <- 6
eng_mod <- vglm(eng.form,
dirmultinomial,
data= eng,
maxit = 250)
fits <- fitted(eng_mod)
eng_sims <- sim.dirmult(eng_mod, eng, nSims = nSims)
### Re-order
### eng_sims <- eng_sims[,,c(6,2:5,1)]
}
### Check
predbar <- apply(eng_sims, c(2,3), mean)
cook <- matrix(NA, nrow = nrow(predbar), ncol = ncol(predbar))
for (i in 1:6) {
y <- fits[,i]
x <- predbar[,i]
mod <- lm(y ~ x)
cook[,i] <- cooks.distance(mod)
}
### Zero out Cook's distance for Buckingham
cook[which(rownames(predbar) == "E14000608"),] <- rep(NA, ncol(cook))
if (any(cook > 4)) {
warning(paste0(sum(cook > 4, na.rm = TRUE),
" obs w/ Cook's distance > 4 in English data; funky covariance matrix"))
pos <- which(apply(cook, 1, max) > 4)
eng_sims[,pos,] <- matrix(fits[pos,], nrow = nSims, ncol = 6, byrow = TRUE)
}
### Now repeat for Scotland
### Scotland
if (type == "vgam") {
sco$Winner15 <- as.character(sco$Winner15)
sco$Winner15 <- dplyr::recode(sco$Winner15,
`Scottish National Party`='Scottish National Party',
.default = "Other")
sco.form <- cbind(Conservative, Labour, `Liberal Democrat`, `Scottish National Party (SNP)`,
`Green Party`, `United Kingdom Independence Party (UKIP)`,
Other) ~
(log1p(Con15) + log1p(SNP15)) * Winner15 +
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(SNP15)) * Figure.to.use
} else {
sco$Winner15 <- factor(as.character(sco$Winner15))
sco.form <- cbind(Other, Labour, `Liberal Democrat`, `Scottish National Party (SNP)`,
`Green Party`, `United Kingdom Independence Party (UKIP)`,
Conservative) ~
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(SNP15)) * Winner15 +
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(SNP15)) * Figure.to.use
}
if (type != "vgam") {
AIC(sco_mod <- multinom(sco.form,
data = sco,
weights = sco$total,
maxit = 250))
fits <- fitted(sco_mod)
sco_tally <- apply(fits, 1, which.max)
sco_sims <- sim.multinom(sco_mod, sco, nSims = nSims)
} else {
M <- 7
sco_mod <- vglm(sco.form,
dirmultinomial,
data= sco,
maxit = 250)
fits <- fitted(sco_mod)
sco_sims <- sim.dirmult(sco_mod, sco, nSims = nSims)
### Re-order
sco_sims <- sco_sims[,,c(7,2:6,1)]
}
### Check whether there are issues w/ flakey predictions
predbar <- apply(sco_sims, c(2,3), mean)
cook <- numeric(0)
for (i in 1:7) {
y <- fits[,i]
x <- predbar[,i]
mod <- lm(y ~ x)
cook <- c(cook, cooks.distance(mod))
}
if (any(cook > 4)) {
warning("Cook's distance > 4 in Scottish data!")
pos <- which(apply(cook, 1, max) > 4)
sco_sims[,pos,] <- matrix(fits[pos,], nrow = nSims, ncol = 6, byrow = TRUE)
}
### Wales
if (type == "vgam") {
wal$Winner15 <- as.character(wal$Winner15)
wal$Winner15 <- dplyr::recode(wal$Winner15,
Labour = 'Labour',
.default = 'Other')
wal.form <- cbind(Other, Labour, `Liberal Democrat`, `Plaid Cymru`,
`Green Party`, `United Kingdom Independence Party (UKIP)`,
Conservative) ~
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(PC15)) * Winner15 +
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(PC15)) * Figure.to.use
} else {
wal.form <- cbind(Conservative, Labour, `Liberal Democrat`, `Plaid Cymru`,
`Green Party`, `United Kingdom Independence Party (UKIP)`,
Other) ~
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(PC15)) * Winner15 +
(log1p(Lab15) + log1p(Con15) + log1p(LD15) + log1p(PC15)) * Figure.to.use
wal$Winner15 <- factor(as.character(wal$Winner15))
}
if (type != "vgam") {
AIC(wal_mod <- multinom(wal.form,
data = wal,
weights = wal$total,
maxit = 250))
fits <- fitted(wal_mod)
wal_tally <- apply(fits, 1, which.max)
wal_sims <- sim.multinom(wal_mod, wal, nSims = nSims)
} else {
M <- 7
wal_mod <- vglm(wal.form,
dirmultinomial,
data= wal,
maxit = 250)
wal_sims <- sim.dirmult(wal_mod, wal, nSims = nSims)
### Re-order
wal_sims <- wal_sims[,,c(7,2:6,1)]
}
### Check whether there are issues w/ flakey predictions
predbar <- apply(wal_sims, c(2,3), mean)
cook <- numeric(0)
for (i in 1:7) {
y <- fits[,i]
x <- predbar[,i]
mod <- lm(y ~ x)
cook <- c(cook, cooks.distance(mod))
}
if (any(cook > 4)) {
warning("Cook's distance > 4 in Scottish data!")
pos <- which(apply(cook, 1, max) > 4)
wal_sims[,pos,] <- matrix(fits[pos,], nrow = nSims, ncol = 6, byrow = TRUE)
}
### What is the variance across simulations?
simvar <- apply(eng_sims, c(2, 3), sd)
summary(as.vector(unlist(simvar)))
### Bind these matrices, filling in zeros
nParties <- 8
nEngConst <- dim(eng_sims)[2]
nWalConst <- dim(wal_sims)[2]
nScoConst <- dim(sco_sims)[2]
nConst <- nEngConst + nWalConst + nScoConst
sims <- array(0, dim = c(nSims, nConst, nParties))
sims[1:nSims, 1:nEngConst, c(1:3, 6:8)] <- eng_sims
sims[1:nSims, (nEngConst+1):(nEngConst + nWalConst), c(1:3, 5:8)] <- wal_sims
sims[1:nSims, (nEngConst+nWalConst+1):nConst, c(1:4,6:8)] <- sco_sims
dimnames(sims)[[2]] <- c(dimnames(eng_sims)[[2]],
dimnames(wal_sims)[[2]],
dimnames(sco_sims)[[2]])
refnos <- dimnames(sims)[[2]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment