alexpkeil1/qgcomp_gee_longitudinal_or_clustered.R

## qgcomp_gee_longitudinal_or_clustered.R
# qgcomp with a gee like approach using bootstrapping or an estimating equation based approach - useful for clustered or longitudinal data when the interest is in effect of x -> y, pooled over multiple time points

library(qgcomp)
library(qgcompint)
set.seed(50)

####### simulate some clustered data -----
# linear model, binary modifier
# simulate cluster specific exposures and outcome means by just treating these like independent observations
dat <- qgcompint::simdata_quantized_emm(outcometype = "continuous",
                                        n = 100,
                                        corr=c(.8, .5, 0.1),
                                        mainterms = c(0.2, 0.1, -0.3, 0.0),
                                        prodterms = c(0.2, 0.1, -0.3, 0.0),
                                        ztype = "continuous"
                                        )
dat$ID = 1:nrow(dat) # cluster/individual ID
clustdat <- rbind(dat, dat) # 2 observations per cluster
clustdat$y <- clustdat$y + rnorm(n=nrow(dat)*2, sd=0.5) # each cluster has two outcomes that are normally distributed with a cluster specific mean


####### analyze clustered data using qgcomp -----
# here we just ignore the simulated effect measure modification
# not cluster appropriate in terms of standard errors, but weights are appropriate because point estimates don't change (for linear model, at least)
(qfit_wrong <- qgcomp.noboot(f=y ~ z + x1 + x2 + x3 + x4,
                                 expnms = paste0("x",1:4), data=clustdat, q=4, family=gaussian()))

# cluster appropriate standard errors using bootstrap
(qfit_long <- qgcomp.boot(f=y ~ z + x1 + x2 + x3 + x4, id="ID",
                          expnms = paste0("x",1:4), data=clustdat, q=4, family=gaussian()))
# cluster appropriate standard errors using estimating equations
(qfit_long2 <- qgcomp.glm.ee(f=y ~ z + x1 + x2 + x3 + x4, id="ID",
                          expnms = paste0("x",1:4), data=clustdat, q=4, family=gaussian()))


####### analyze clustered data using qgcompint -----
# not cluster appropriate in terms of standard errors, but weights are appropriate because point estimates don't change (for linear model, at least)
(qfit_emm_wrong <- qgcomp.emm.noboot(f=y ~ z + x1 + x2 + x3 + x4, emmvar="z",
                           expnms = paste0("x",1:4), data=clustdat, q=4, family=gaussian()))

# cluster appropriate standard errors
(qfit_emm_long <- qgcomp.emm.boot(f=y ~ z + x1 + x2 + x3 + x4, emmvar="z", id="ID",
                           expnms = paste0("x",1:4), data=clustdat, q=4, family=gaussian()))

# cluster appropriate standard errors using estimating equations
 # not yet implemented
	# qgcomp with a gee like approach using bootstrapping or an estimating equation based approach - useful for clustered or longitudinal data when the interest is in effect of x -> y, pooled over multiple time points

	library(qgcomp)
	library(qgcompint)
	set.seed(50)

	####### simulate some clustered data -----
	# linear model, binary modifier
	# simulate cluster specific exposures and outcome means by just treating these like independent observations
	dat <- qgcompint::simdata_quantized_emm(outcometype = "continuous",
	n = 100,
	corr=c(.8, .5, 0.1),
	mainterms = c(0.2, 0.1, -0.3, 0.0),
	prodterms = c(0.2, 0.1, -0.3, 0.0),
	ztype = "continuous"
	)
	dat$ID = 1:nrow(dat) # cluster/individual ID
	clustdat <- rbind(dat, dat) # 2 observations per cluster
	clustdat$y <- clustdat$y + rnorm(n=nrow(dat)*2, sd=0.5) # each cluster has two outcomes that are normally distributed with a cluster specific mean


	####### analyze clustered data using qgcomp -----
	# here we just ignore the simulated effect measure modification
	# not cluster appropriate in terms of standard errors, but weights are appropriate because point estimates don't change (for linear model, at least)
	(qfit_wrong <- qgcomp.noboot(f=y ~ z + x1 + x2 + x3 + x4,
	expnms = paste0("x",1:4), data=clustdat, q=4, family=gaussian()))

	# cluster appropriate standard errors using bootstrap
	(qfit_long <- qgcomp.boot(f=y ~ z + x1 + x2 + x3 + x4, id="ID",
	expnms = paste0("x",1:4), data=clustdat, q=4, family=gaussian()))
	# cluster appropriate standard errors using estimating equations
	(qfit_long2 <- qgcomp.glm.ee(f=y ~ z + x1 + x2 + x3 + x4, id="ID",
	expnms = paste0("x",1:4), data=clustdat, q=4, family=gaussian()))


	####### analyze clustered data using qgcompint -----
	# not cluster appropriate in terms of standard errors, but weights are appropriate because point estimates don't change (for linear model, at least)
	(qfit_emm_wrong <- qgcomp.emm.noboot(f=y ~ z + x1 + x2 + x3 + x4, emmvar="z",
	expnms = paste0("x",1:4), data=clustdat, q=4, family=gaussian()))

	# cluster appropriate standard errors
	(qfit_emm_long <- qgcomp.emm.boot(f=y ~ z + x1 + x2 + x3 + x4, emmvar="z", id="ID",
	expnms = paste0("x",1:4), data=clustdat, q=4, family=gaussian()))

	# cluster appropriate standard errors using estimating equations
	# not yet implemented