Skip to content

Instantly share code, notes, and snippets.

@carlislerainey
Last active July 27, 2017 17:11
Show Gist options
  • Save carlislerainey/9963264 to your computer and use it in GitHub Desktop.
Save carlislerainey/9963264 to your computer and use it in GitHub Desktop.
A function to combine the sets of estimates from multiply imputed data sets into a single set of coefficients, standard errors, and a covariance matrix.
##########################################
## A Function to Combine Estimates from MI
##########################################
comb.mi <- function(models) {
# Arguments
# models: a list of models, one estimated on each of m MI data sets.
# Note: I borrow the notation below mostly from Rubin's *Multiple Imputation
# for Nonresponse in Surveys*.
# count the number of imputations
n.imp <- length(models)
# calculate the average of the coefficients (Eqn 3.1.2, p. 76)
mi.est <- NULL
for (i in 1:n.imp) {
mi.est <- rbind(mi.est, coef(models[[i]]))
}
Q.bar <- apply(mi.est, 2, mean)
# store estimates
comb.est <- Q.bar
# calculate the average of the covariances (Eqn 3.1.3, p. 76)
U.bar <- NULL
U.bar <- vcov(models[[1]])
for (i in 2:n.imp) {
U.bar <- U.bar + vcov(models[[i]])
}
U.bar <- U.bar/n.imp
# calculate the variance *between* the MI data sets (Eqn 3.1.4, p. 76)
B <- NULL
B <- (coef(models[[1]]) - Q.bar)%*%t(coef(models[[1]]) - Q.bar)
for (i in 2:n.imp) {
B <- B + (coef(models[[i]]) - Q.bar)%*%t(coef(models[[i]]) - Q.bar)
}
B <- B/(n.imp - 1)
# calculate the *total* covariance (Eqn 3.1.5, p. 76)
comb.cov <- U.bar + (1 + 1/(n.imp))*B # T iin Rubin's notation
# calculate the standard errors
comb.se <- sqrt(diag(comb.cov))
# add the relevant quantities to a list and return it
res <- list(comb.est = comb.est, comb.cov = comb.cov, comb.se = comb.se)
return(res)
# print the estimates and standard errors
print.it <- cbind(comb.est, comb.se)
rownames(print.it) <- names(comb.est)
}
##########################################
## Testing
##########################################
# load packages
library(Amelia)
# simulate data
n <- 1000
x <- runif(n)
e <- rnorm(n)
y <- 2*x + e
y.complete <- y
z1 <- y + rnorm(n) # another variable that will be useful to us
z2 <- y + rnorm(n)
# create missingness
# y.miss <- as.logical(rbinom(n, 1, .25)) # MCAR
# y.miss <- as.logical(rbinom(n, 1, plogis(2*z1))) # MAR
y.miss <- (y > quantile(y, .75)) # Nonignorable
y[y.miss] <- NA
# store in data frame
d <- data.frame(x, y, z1, z2)
# multiple imputation
n.imp <- 5
library(Amelia)
mi <- amelia(d, m = n.imp)
# estimate and combine models into a list
m.imp <- list()
for (i in 1:n.imp) {
m.imp[[i]] <- lm(y ~ x, data = mi$imputations[[i]])
}
# use Amelia's function
mi.est <- NULL
for (i in 1:n.imp) {
mi.est <- rbind(mi.est, coef(m.imp[[i]]))
}
mi.se <- NULL
for (i in 1:n.imp) {
mi.se <- rbind(mi.se, sqrt(diag(vcov(m.imp[[i]]))))
}
mi.meld(q = mi.est, se = mi.se)
# and compare to ours
comb.mi(m.imp)
@saudiwin
Copy link

Thanks for sharing this function!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment