Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Created May 10, 2017 08:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chrishanretty/4827cec6a2636481060acc4d3d439e1b to your computer and use it in GitHub Desktop.
Save chrishanretty/4827cec6a2636481060acc4d3d439e1b to your computer and use it in GitHub Desktop.
Forecast election day results on the basis of final polls
### Forecast election day results on the basis of final polls
### Some of this code is clunky (particularly calibration)
### but it is a useful test bed if you want to move from
### a multivariate normal to another distribution (say, a
### multivariate t distribution) -- just change the code in
### check_calibration. Do, however, read
### https://journal.r-project.org/archive/2013-2/hofert.pdf
### first!
## ----loadlibs------------------------------------------------------------
library(knitr)
library(pander)
library(Matrix)
library(mvtnorm)
## ----datin---------------------------------------------------------------
polls <- structure(list(Conservatives = c(0.463137689855468, 0.456132838783667,
0.42634772561737, 0.375342469945955, 0.308737479390641, 0.322761590590601,
0.32407274487598, 0.352164318385373, 0.336886748984473), Labour = c(0.393273527306154,
0.266928720258727, 0.335530448967622, 0.391501599270666, 0.478126236740175,
0.458478005198966, 0.379647880476463, 0.274403670248034, 0.336584668910591
), Liberal.Democrats = c(0.118964928480032, 0.254004731185892,
0.210710045749512, 0.190497666825356, 0.14310292507149, 0.156146559027138,
0.223943628451435, 0.273044090920638, 0.0849106722877976), Other = c(0.0246238543583452,
0.0229337097717135, 0.0274117796654951, 0.0426582639580229, 0.0700333587976943,
0.0626138451832949, 0.0723357461961213, 0.100387920445954, 0.241617909817138
)), .Names = c("Conservatives", "Labour", "Liberal.Democrats",
"Other"), class = "data.frame", row.names = c("1979", "1983",
"1987", "1992", "1997", "2001", "2005", "2010", "2015"))
shares <- structure(list(Conservatives = c(0.447774470245795, 0.43485092464566,
0.432466614101659, 0.428498495558431, 0.314618578007995, 0.324463265750917,
0.332359474708807, 0.368804460197394, 0.376137629058115), Labour = c(0.376979254862873,
0.282617296534093, 0.31519111447088, 0.351497022738108, 0.442984244236696,
0.416369176542003, 0.361397077962564, 0.296545633515872, 0.311041250597578
), Liberal.Democrats = c(0.141014904292, 0.260029558584563, 0.230726978209406,
0.182418283404197, 0.171809011855532, 0.186903837282912, 0.226447535045442,
0.235549351036152, 0.0804167859715201), Other = c(0.0342313705993318,
0.0225022202356833, 0.0216152932180549, 0.0375861982992636, 0.0705881658997772,
0.0722637204241681, 0.0797959122831868, 0.0991005552505823, 0.232404334372786
)), .Names = c("Conservatives", "Labour", "Liberal.Democrats",
"Other"), class = "data.frame", row.names = c("1979", "1983",
"1987", "1992", "1997", "2001", "2005", "2010", "2015"))
## ----diffsout, results = "asis"------------------------------------------
diffs <- shares - polls
diffs <- rbind(diffs, colMeans(diffs))
rownames(diffs)[nrow(diffs)] <- "Average"
pander(diffs)
## ----ecm-----------------------------------------------------------------
example.shares <- c(0.44, 0.29, 0.11, 0.16)
ecm <- function(x, y) { ## expect correlation matrix
-1 * sqrt((x*y)/((1-x)*(1-y)))
}
mat <- outer(example.shares, example.shares, ecm)
diag(mat) <- 1
pander(mat)
## ----cor2cov-------------------------------------------------------------
cor2cov = function(corMat, varVec) {
# test the input
if (!is.matrix(corMat)) stop("'corMat must be a matrix")
n = nrow(corMat)
if (ncol(corMat) != n) stop("'corMat' must be square")
if (mode(corMat) != "numeric") stop("'corMat must be numeric")
if (mode(varVec) != "numeric") stop("'varVec must be numeric")
if (!is.null(dim(varVec))) {
if (length(dim(varVec)) != 2) stop("'varVec' should be a vector")
if (any(dim(varVec)==1)) stop("'varVec' cannot be a matrix")
varVec = as.numeric(varVec) # convert row or col matrix to a vector
}
if (!all(diag(corMat) == 1)) stop("correlation matrices have 1 on the diagonal")
if (any(corMat < -1 | corMat > +1))
stop("correlations must be between -1 and 1")
if (any(varVec <= 0)) stop("variances must be non-negative")
if (length(varVec) != n) stop("length of 'varMat' does not match 'corMat' size")
# Compute the covariance
sdMat = diag(sqrt(varVec))
rtn = sdMat %*% corMat %*% t(sdMat)
if (det(rtn)<=0) warning("covariance matrix is not positive definite")
return(rtn)
}
## ----cv------------------------------------------------------------------
check_calibration <- function(sigma = 0.01, alpha = 0.05,
p = polls, s = shares, nSims = 1000) {
calibs <- numeric(0)
for (i in 1:nrow(s)) {
errs <- s[-i,] - p[-i,]
delta <- colMeans(errs)
cormat <- outer(as.vector(unlist(p[i,])),
as.vector(unlist(p[i,])),
ecm)
diag(cormat) <- 1
cormat <- as.matrix(nearPD(cormat)$mat)
diag(cormat) <- 1
Sigma <- cor2cov(corMat = cormat, varVec = rep(sigma, ncol(cormat)))
deviates <- rmvnorm(n = nSims,
mean = as.vector(unlist(p[i,] + delta)),
sigma = Sigma)
ci <- apply(deviates, 2, quantile, c(alpha/2, 1 - (alpha/2)))
calib <- apply(s[i,] > ci[1,] & s[i,] < ci[2,], 2, min)
calibs <- c(calibs, sum(calib))
}
return(sum(calibs) / length(unlist(shares)))
}
## ----plotfigs, fig = TRUE, fig.cap = "Calibration"-----------------------
### Let's evaluate this function for a range of values
test.df <- expand.grid(alpha = c(0.05, 0.1, 0.5),
sigma = seq(0.0001, 0.005, length.out = 100))
test.df$calib <- apply(test.df, 1, function(x)
check_calibration(sigma = x["sigma"], alpha = x["alpha"]))
plot(test.df$sigma, test.df$calib,
type = "n")
with(subset(test.df, alpha == 0.05),
lines(sigma, calib, lty = 1, col = 'red'))
with(subset(test.df, alpha == 0.1),
lines(sigma, calib, lty = 2, col = 'darkblue'))
with(subset(test.df, alpha == 0.5),
lines(sigma, calib, lty = 3, col = 'forestgreen'))
for (a in c(0.5, 0.9, 0.95)) {
abline(h = a, lty = 1, col = 'grey')
}
legend("bottomright",
lty = 1:3,
col = c("red", "darkblue", "forestgreen"),
legend = c("95%", "90%", "50%"))
### Find out the actual values
test.df <- test.df[order(test.df$sigma),]
sigma1 <- test.df$sigma[which(test.df$calib > 0.9 & test.df$alpha == 0.1)[1]]
sigma2 <- test.df$sigma[which(test.df$calib > 0.95 & test.df$alpha == 0.05)[1]]
sigma3 <- test.df$sigma[which(test.df$calib > 0.95 & test.df$alpha == 0.05)[1]]
chosen.sigma <- min(c(sigma1, sigma2, sigma3))
## ----newsims-------------------------------------------------------------
### Now simulate
newpolls <- c(0.43, 0.28, 0.11)
newpolls <- c(newpolls, 1 - sum(newpolls))
cormat <- outer(newpolls,
newpolls,
ecm)
diag(cormat) <- 1
cormat <- as.matrix(nearPD(cormat)$mat)
diag(cormat) <- 1
Sigma <- cor2cov(corMat = cormat, varVec = rep(chosen.sigma, ncol(cormat)))
meanmat <- matrix(newpolls,
nrow = 1000,
ncol = length(newpolls),
byrow = TRUE)
deviates <- rmvnorm(n = 1000,
mean = newpolls,
sigma = Sigma)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment