alexpkeil1/qgcomp_time_varying_exposures.R

## qgcomp_time_varying_exposures.R
# simple application of qgcomp with time-varying exposures
# the simulated model is for the discrete hazard, given recent exposure and recent values of confounders
# this method can be used for alternative scenarios (e.g. cumulative exposure), which corresponds to
# different parametric assumptions

library(qgcomp)
library(survival)

# create simple data set with time-varying exposures
makedata <- function(N=100){
  ntimes = 2 # hard-coded number of time points
  id = rep(1:N, each=ntimes)
  time = rep(1:ntimes, N)

  v = rbinom(N, 1, 0.5)
  w1 = rbinom(N, 1, 0.5)
  x1a = rnorm(N, 0 + w1 + v, 1)
  x1b = rnorm(N, 0 + w1 + v, 1)
  x1q = qgcomp::quantize(data.frame(x1a=x1a, x1b), expnms=c("x1a", "x1b"), breaks = list(c(-Inf, -1, 0, 1, Inf),c(-Inf,-1, 0, 1,Inf)))$data
  y1 = rbinom(N, 1, exp(-5 + .75*x1q$x1a  + 0.25*x1q$x1b + 0.5*w1 + 0.5*v))
  w2 = rbinom(N, 1, 0.5)
  x2a = rnorm(N,  0 + w2 + v, 1)
  x2b = rnorm(N, 0 + w2 + v, 1)
  x2q = qgcomp::quantize(data.frame(x2a=x2a, x2b=x2b), expnms=c("x2a", "x2b"), breaks = list(c(-Inf, -1, 0, 1, Inf),c(-Inf,-1, 0, 1,Inf)))$data
  y2 = rbinom(N, 1, exp(-5 + .75*x2q$x2a  + 0.25*x2q$x2b + 0.5*w2 + 0.5*v))
  cxa = x1a + x2a
  cxb = x1b + x2b
  cu = w1 + w2
  d = apply(cbind(y1,y2), 1, max)
  # "flat" data
  dat = data.frame(v, w1, w2, x1a, x1b, y1, x2a, x2b, y2, cxa, cxb, cu, d)
  # person period data (this is an ugly trick for 2 time points)
  ppdat = data.frame(id, time, v=c(rbind(v, v)), xa=c(rbind(x1a, x2a)), xb=c(rbind(x1b, x2b)), xlag=c(rbind(x1*NA, x1)), w=c(rbind(w1, w2)), y=c(rbind(y1, y2)))
  DROP = c(rbind(ifelse(y1,FALSE,FALSE), ifelse(y1,TRUE,FALSE)))
  list(widedat = dat, longdat = ppdat[!DROP,,drop=FALSE])
}

# note this example is just to show that qgcomp can recover the true weights and joint effect in a simple setting
# simulating under a more complex hazard scenario (e.g. more time points) is trickier, so is not done here
# the breslow estimator of the ties happens to correspond to how the data are generated here, and is not an endorsement of its use generally
tvdat = makedata(N=100000)$longdat
tvdat$intime = tvdat$time-1
qgcomp.cox.noboot(Surv(intime, time, y)~xa+xb + v + w, expnms = c("xa", "xb"), data = tvdat, ties="breslow", breaks = list(c(-Inf, -1, 0, 1, Inf),c(-Inf,-1, 0, 1,Inf)))


# note, typically, we would run the following (which does not correspond to the data generating mechanism here, but is a generally better accepted approach)
qgcomp.cox.noboot(Surv(intime, time, y)~xa+xb + v + w, expnms = c("xa", "xb"), data = tvdat, q=4)
# this quantizes exposures using the quantiles pooled over all time points (you may alternatively wish to specify breaks explicitly, as above)
	# simple application of qgcomp with time-varying exposures
	# the simulated model is for the discrete hazard, given recent exposure and recent values of confounders
	# this method can be used for alternative scenarios (e.g. cumulative exposure), which corresponds to
	# different parametric assumptions

	library(qgcomp)
	library(survival)

	# create simple data set with time-varying exposures
	makedata <- function(N=100){
	ntimes = 2 # hard-coded number of time points
	id = rep(1:N, each=ntimes)
	time = rep(1:ntimes, N)

	v = rbinom(N, 1, 0.5)
	w1 = rbinom(N, 1, 0.5)
	x1a = rnorm(N, 0 + w1 + v, 1)
	x1b = rnorm(N, 0 + w1 + v, 1)
	x1q = qgcomp::quantize(data.frame(x1a=x1a, x1b), expnms=c("x1a", "x1b"), breaks = list(c(-Inf, -1, 0, 1, Inf),c(-Inf,-1, 0, 1,Inf)))$data
	y1 = rbinom(N, 1, exp(-5 + .75x1q$x1a + 0.25x1q$x1b + 0.5w1 + 0.5v))
	w2 = rbinom(N, 1, 0.5)
	x2a = rnorm(N, 0 + w2 + v, 1)
	x2b = rnorm(N, 0 + w2 + v, 1)
	x2q = qgcomp::quantize(data.frame(x2a=x2a, x2b=x2b), expnms=c("x2a", "x2b"), breaks = list(c(-Inf, -1, 0, 1, Inf),c(-Inf,-1, 0, 1,Inf)))$data
	y2 = rbinom(N, 1, exp(-5 + .75x2q$x2a + 0.25x2q$x2b + 0.5w2 + 0.5v))
	cxa = x1a + x2a
	cxb = x1b + x2b
	cu = w1 + w2
	d = apply(cbind(y1,y2), 1, max)
	# "flat" data
	dat = data.frame(v, w1, w2, x1a, x1b, y1, x2a, x2b, y2, cxa, cxb, cu, d)
	# person period data (this is an ugly trick for 2 time points)
	ppdat = data.frame(id, time, v=c(rbind(v, v)), xa=c(rbind(x1a, x2a)), xb=c(rbind(x1b, x2b)), xlag=c(rbind(x1*NA, x1)), w=c(rbind(w1, w2)), y=c(rbind(y1, y2)))
	DROP = c(rbind(ifelse(y1,FALSE,FALSE), ifelse(y1,TRUE,FALSE)))
	list(widedat = dat, longdat = ppdat[!DROP,,drop=FALSE])
	}

	# note this example is just to show that qgcomp can recover the true weights and joint effect in a simple setting
	# simulating under a more complex hazard scenario (e.g. more time points) is trickier, so is not done here
	# the breslow estimator of the ties happens to correspond to how the data are generated here, and is not an endorsement of its use generally
	tvdat = makedata(N=100000)$longdat
	tvdat$intime = tvdat$time-1
	qgcomp.cox.noboot(Surv(intime, time, y)~xa+xb + v + w, expnms = c("xa", "xb"), data = tvdat, ties="breslow", breaks = list(c(-Inf, -1, 0, 1, Inf),c(-Inf,-1, 0, 1,Inf)))



	# note, typically, we would run the following (which does not correspond to the data generating mechanism here, but is a generally better accepted approach)
	qgcomp.cox.noboot(Surv(intime, time, y)~xa+xb + v + w, expnms = c("xa", "xb"), data = tvdat, q=4)
	# this quantizes exposures using the quantiles pooled over all time points (you may alternatively wish to specify breaks explicitly, as above)