Last active
July 27, 2017 17:11
-
-
Save carlislerainey/9963264 to your computer and use it in GitHub Desktop.
A function to combine the sets of estimates from multiply imputed data sets into a single set of coefficients, standard errors, and a covariance matrix.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################## | |
## A Function to Combine Estimates from MI | |
########################################## | |
comb.mi <- function(models) { | |
# Arguments | |
# models: a list of models, one estimated on each of m MI data sets. | |
# Note: I borrow the notation below mostly from Rubin's *Multiple Imputation | |
# for Nonresponse in Surveys*. | |
# count the number of imputations | |
n.imp <- length(models) | |
# calculate the average of the coefficients (Eqn 3.1.2, p. 76) | |
mi.est <- NULL | |
for (i in 1:n.imp) { | |
mi.est <- rbind(mi.est, coef(models[[i]])) | |
} | |
Q.bar <- apply(mi.est, 2, mean) | |
# store estimates | |
comb.est <- Q.bar | |
# calculate the average of the covariances (Eqn 3.1.3, p. 76) | |
U.bar <- NULL | |
U.bar <- vcov(models[[1]]) | |
for (i in 2:n.imp) { | |
U.bar <- U.bar + vcov(models[[i]]) | |
} | |
U.bar <- U.bar/n.imp | |
# calculate the variance *between* the MI data sets (Eqn 3.1.4, p. 76) | |
B <- NULL | |
B <- (coef(models[[1]]) - Q.bar)%*%t(coef(models[[1]]) - Q.bar) | |
for (i in 2:n.imp) { | |
B <- B + (coef(models[[i]]) - Q.bar)%*%t(coef(models[[i]]) - Q.bar) | |
} | |
B <- B/(n.imp - 1) | |
# calculate the *total* covariance (Eqn 3.1.5, p. 76) | |
comb.cov <- U.bar + (1 + 1/(n.imp))*B # T iin Rubin's notation | |
# calculate the standard errors | |
comb.se <- sqrt(diag(comb.cov)) | |
# add the relevant quantities to a list and return it | |
res <- list(comb.est = comb.est, comb.cov = comb.cov, comb.se = comb.se) | |
return(res) | |
# print the estimates and standard errors | |
print.it <- cbind(comb.est, comb.se) | |
rownames(print.it) <- names(comb.est) | |
} | |
########################################## | |
## Testing | |
########################################## | |
# load packages | |
library(Amelia) | |
# simulate data | |
n <- 1000 | |
x <- runif(n) | |
e <- rnorm(n) | |
y <- 2*x + e | |
y.complete <- y | |
z1 <- y + rnorm(n) # another variable that will be useful to us | |
z2 <- y + rnorm(n) | |
# create missingness | |
# y.miss <- as.logical(rbinom(n, 1, .25)) # MCAR | |
# y.miss <- as.logical(rbinom(n, 1, plogis(2*z1))) # MAR | |
y.miss <- (y > quantile(y, .75)) # Nonignorable | |
y[y.miss] <- NA | |
# store in data frame | |
d <- data.frame(x, y, z1, z2) | |
# multiple imputation | |
n.imp <- 5 | |
library(Amelia) | |
mi <- amelia(d, m = n.imp) | |
# estimate and combine models into a list | |
m.imp <- list() | |
for (i in 1:n.imp) { | |
m.imp[[i]] <- lm(y ~ x, data = mi$imputations[[i]]) | |
} | |
# use Amelia's function | |
mi.est <- NULL | |
for (i in 1:n.imp) { | |
mi.est <- rbind(mi.est, coef(m.imp[[i]])) | |
} | |
mi.se <- NULL | |
for (i in 1:n.imp) { | |
mi.se <- rbind(mi.se, sqrt(diag(vcov(m.imp[[i]])))) | |
} | |
mi.meld(q = mi.est, se = mi.se) | |
# and compare to ours | |
comb.mi(m.imp) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for sharing this function!