carlislerainey/comb_mi.R

## comb_mi.R
##########################################
## A Function to Combine Estimates from MI
##########################################

comb.mi <- function(models) {

  # Arguments
  #   models:  a list of models, one estimated on each of m MI data sets.

  # Note: I borrow the notation below mostly from Rubin's *Multiple Imputation
  # for Nonresponse in Surveys*.

  # count the number of imputations
  n.imp <- length(models)

  # calculate the average of the coefficients (Eqn 3.1.2, p. 76)
  mi.est <- NULL
  for (i in 1:n.imp) {
    mi.est <- rbind(mi.est, coef(models[[i]]))
  }
  Q.bar <- apply(mi.est, 2, mean)
  # store estimates
  comb.est <- Q.bar

  # calculate the average of the covariances (Eqn 3.1.3, p. 76)
  U.bar <- NULL
  U.bar <- vcov(models[[1]])
  for (i in 2:n.imp) {
    U.bar <- U.bar + vcov(models[[i]])
  }
  U.bar <- U.bar/n.imp

  # calculate the variance *between* the MI data sets (Eqn 3.1.4, p. 76)
  B <- NULL
  B <- (coef(models[[1]]) - Q.bar)%*%t(coef(models[[1]]) - Q.bar)
  for (i in 2:n.imp) {
    B <- B + (coef(models[[i]]) - Q.bar)%*%t(coef(models[[i]]) - Q.bar)
  }
  B <- B/(n.imp - 1)

  # calculate the *total* covariance (Eqn 3.1.5, p. 76)
  comb.cov <- U.bar + (1 + 1/(n.imp))*B # T iin Rubin's notation

  # calculate the standard errors
  comb.se <- sqrt(diag(comb.cov))

  # add the relevant quantities to a list and return it
  res <- list(comb.est = comb.est, comb.cov = comb.cov, comb.se = comb.se)
  return(res)

  # print the estimates and standard errors
  print.it <- cbind(comb.est, comb.se)
  rownames(print.it) <- names(comb.est)
}

##########################################
## Testing
##########################################

# load packages
library(Amelia)

# simulate data
n <- 1000
x <- runif(n)
e <- rnorm(n)
y <- 2*x + e
y.complete <- y
z1 <- y + rnorm(n)  # another variable that will be useful to us
z2 <- y + rnorm(n)

# create missingness
# y.miss <- as.logical(rbinom(n, 1, .25))  # MCAR
# y.miss <- as.logical(rbinom(n, 1, plogis(2*z1)))  # MAR
y.miss <- (y > quantile(y, .75)) # Nonignorable
y[y.miss] <- NA

# store in data frame
d <- data.frame(x, y, z1, z2)

# multiple imputation
n.imp <- 5
library(Amelia)
mi <- amelia(d, m = n.imp)

# estimate and combine models into a list
m.imp <- list()
for (i in 1:n.imp) {
  m.imp[[i]] <- lm(y ~ x, data = mi$imputations[[i]])
}

# use Amelia's function
mi.est <- NULL
for (i in 1:n.imp) {
  mi.est <- rbind(mi.est, coef(m.imp[[i]]))
}
mi.se <- NULL
for (i in 1:n.imp) {
  mi.se <- rbind(mi.se, sqrt(diag(vcov(m.imp[[i]]))))
}
mi.meld(q = mi.est, se = mi.se)

# and compare to ours
comb.mi(m.imp)
	##########################################
	## A Function to Combine Estimates from MI
	##########################################

	comb.mi <- function(models) {

	# Arguments
	# models: a list of models, one estimated on each of m MI data sets.

	# Note: I borrow the notation below mostly from Rubin's *Multiple Imputation
	# for Nonresponse in Surveys*.

	# count the number of imputations
	n.imp <- length(models)

	# calculate the average of the coefficients (Eqn 3.1.2, p. 76)
	mi.est <- NULL
	for (i in 1:n.imp) {
	mi.est <- rbind(mi.est, coef(models[[i]]))
	}
	Q.bar <- apply(mi.est, 2, mean)
	# store estimates
	comb.est <- Q.bar

	# calculate the average of the covariances (Eqn 3.1.3, p. 76)
	U.bar <- NULL
	U.bar <- vcov(models[[1]])
	for (i in 2:n.imp) {
	U.bar <- U.bar + vcov(models[[i]])
	}
	U.bar <- U.bar/n.imp

	# calculate the variance between the MI data sets (Eqn 3.1.4, p. 76)
	B <- NULL
	B <- (coef(models[[1]]) - Q.bar)%*%t(coef(models[[1]]) - Q.bar)
	for (i in 2:n.imp) {
	B <- B + (coef(models[[i]]) - Q.bar)%*%t(coef(models[[i]]) - Q.bar)
	}
	B <- B/(n.imp - 1)

	# calculate the total covariance (Eqn 3.1.5, p. 76)
	comb.cov <- U.bar + (1 + 1/(n.imp))*B # T iin Rubin's notation

	# calculate the standard errors
	comb.se <- sqrt(diag(comb.cov))

	# add the relevant quantities to a list and return it
	res <- list(comb.est = comb.est, comb.cov = comb.cov, comb.se = comb.se)
	return(res)

	# print the estimates and standard errors
	print.it <- cbind(comb.est, comb.se)
	rownames(print.it) <- names(comb.est)
	}

	##########################################
	## Testing
	##########################################

	# load packages
	library(Amelia)

	# simulate data
	n <- 1000
	x <- runif(n)
	e <- rnorm(n)
	y <- 2*x + e
	y.complete <- y
	z1 <- y + rnorm(n) # another variable that will be useful to us
	z2 <- y + rnorm(n)

	# create missingness
	# y.miss <- as.logical(rbinom(n, 1, .25)) # MCAR
	# y.miss <- as.logical(rbinom(n, 1, plogis(2*z1))) # MAR
	y.miss <- (y > quantile(y, .75)) # Nonignorable
	y[y.miss] <- NA

	# store in data frame
	d <- data.frame(x, y, z1, z2)

	# multiple imputation
	n.imp <- 5
	library(Amelia)
	mi <- amelia(d, m = n.imp)

	# estimate and combine models into a list
	m.imp <- list()
	for (i in 1:n.imp) {
	m.imp[[i]] <- lm(y ~ x, data = mi$imputations[[i]])
	}

	# use Amelia's function
	mi.est <- NULL
	for (i in 1:n.imp) {
	mi.est <- rbind(mi.est, coef(m.imp[[i]]))
	}
	mi.se <- NULL
	for (i in 1:n.imp) {
	mi.se <- rbind(mi.se, sqrt(diag(vcov(m.imp[[i]]))))
	}
	mi.meld(q = mi.est, se = mi.se)

	# and compare to ours
	comb.mi(m.imp)