Skip to content

Instantly share code, notes, and snippets.

@danlewer
Last active June 24, 2024 07:45
Show Gist options
  • Save danlewer/41e382689ec3b95c77e862df5607384a to your computer and use it in GitHub Desktop.
Save danlewer/41e382689ec3b95c77e862df5607384a to your computer and use it in GitHub Desktop.
library(lme4) # for ML/REML fitting of mixed models
# ----------------------------------------------------------------------------------------------
# simulate a clustered dataset with cluster-level outcome affected by an individual "confounder"
# ----------------------------------------------------------------------------------------------
# inputs
# ------
# sample size
clusters <- 20
cluster_size_range <- floor(exp(100:400/400) ^ 4 + 10) - 2
# model parameters
intercept <- 27
risk_factor_B <- 1.7
sd_re <- 5 # standard deviation in random intercepts
# simulate data (note arbitrary variances)
# ----------------------------------------
per_cluster <- sample(cluster_size_range, clusters, T)
N <- sum(per_cluster)
cluster_effect <- rnorm(clusters, 0, sd_re)
risk_factor_cmean <- rnorm(clusters, 10, 3)
d <- data.frame(id = rep(seq_len(clusters), per_cluster),
cluster_effect = rep(cluster_effect, per_cluster),
risk_factor_cmean = rep(risk_factor_cmean, per_cluster))
d$risk_factor <- d$risk_factor_cmean + rnorm(N, 0, 3)
d$target <- intercept + d$cluster_effect + d$risk_factor * risk_factor_B
d$outcome <- rnorm(N, d$target, 4)
d[, c('cluster_effect', 'risk_factor_cmean', 'target')] <- list(NULL)
# visualise exposure & outcome
# ----------------------------
par(mfrow = c(1, 2))
plot(d$risk_factor, d$outcome, main = 'Individual level')
plot(x = tapply(d$risk_factor, d$id, mean),
y = tapply(d$outcome, d$id, mean),
main = 'Cluster level', xlab = 'Cluster risk factor mean', ylab = 'Cluster outcome mean') # cluster level
# --------------------------------
# fit model with random intercepts
# --------------------------------
m <- lmer(outcome ~ risk_factor + (1|id), data = d)
# -----------------------
# calculate cluster means
# -----------------------
# crude outcome
# -------------
means <- tapply(d$outcome, d$id, function (x) c(mean(x), t.test(x)$conf.int[1:2]))
means <- do.call(rbind, means)
colnames(means) <- c('crude', 'crude_lower', 'crude_upper')
# risk factor
# -----------
means <- cbind(data.frame(risk_factor = tapply(d$risk_factor, d$id, mean)), means)
# bootstrap confidence intervals for standardised outcomes
# --------------------------------------------------------
# simple example of bootMer function
fboot <- function (.) fixef(.)[2] # extract fixed effect beta for risk factor from model
fboot(m) # beta
B <- bootMer(m, fboot, nsim = 500) # bootstraps of beta
c(fboot(m), confint(B)) # confidence intervals using the percentile method
confint(m) # compare confint.merMod, which defaults to the profile method
# bootstrap confidence intervals for standardised outcome means (this takes a while)
mci <- function (cluster = 1, B = 500) {
nd <- data.frame(risk_factor = d$risk_factor, id = cluster)
fboot <- function (.) mean(predict(., newdata = nd))
r <- bootMer(m, fboot, nsim = B, use.u = T)
c(r$t0, confint(r, type = 'perc')) # defaults to the percentile method. the choice of method is non trivial
}
bootRes <- sapply(seq_len(clusters), function (x) {
print (x)
mci (x)
})
bootRes <- `colnames<-`(t(bootRes), c('bootPoint', 'bootLower', 'bootUpper'))
means <- cbind(means, bootRes)
# compare risk factor and effect on standardised outcome (expect lower than average risk factor to cause reduction in standardised estimate and vice versa)
par(mfrow = c(1, 1))
with(means, plot(risk_factor, crude - bootPoint, xlab = 'Mean risk factor', ylab = 'change from crude to standardised outcome'))
# -------------------
# order by crude mean
# -------------------
means <- means[order(means$crude),]
# --------------------------------------
# visualise crude and standardised means
# --------------------------------------
means$y <- 1:nrow(means)
jit <- 0.1
cols <- c('red', 'blue')
plot(1, type = 'n', xlim = range(means[, 2:7]), ylim = c(0, clusters), xlab = 'Cluster mean', ylab = 'Cluster')
with(means, {
points(crude, y-jit, pch = 19, col = cols[1])
arrows(crude_lower, y-jit, crude_upper, y-jit, code = 3, angle = 90, length = 0.05, col = cols[1])
points(bootPoint, y+jit, pch = 19, col = cols[2])
arrows(bootLower, y+jit, bootUpper, y+jit, code = 3, angle = 90, length = 0.05, col = cols[2])
})
abline(v = mean(d$outcome))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment