Skip to content

Instantly share code, notes, and snippets.

@alexpkeil1
Created March 16, 2020 17:43
Show Gist options
  • Save alexpkeil1/1e4bdf8e2101198c6da454e4a2ca1937 to your computer and use it in GitHub Desktop.
Save alexpkeil1/1e4bdf8e2101198c6da454e4a2ca1937 to your computer and use it in GitHub Desktop.
# Example in which weighted quantile sums estimates a significant, positive effect of exposure due to pure confounding
# even though there are no exposures that have a positive effect on the outcome
library(qgcomp)
library(gWQS)
dgm <- function(N = 100, b0=0, coef=c(1,0,0,0), family=gaussian(), ncor=0, corr=0.75, umc=FALSE){
# simulate under data structure where WQS is the truth: e.g. an exposure with
# multinomial distribution
p = length(coef)
if(ncor >= p) ncor = p-1
X = matrix(nrow=N, ncol=p)
colnames(X) <- paste0("x", 1:p)
xmaster = sample(rep(0:3, length.out=N), N, replace=FALSE)
for(k in 1:p){
newx = numeric(N)
c1 = as.logical(rbinom(N, 1, sqrt(corr)))
newx[which(c1)] = xmaster[which(c1)]
newx[which(!c1)] = sample(xmaster[which(!c1)])
if(k<=(ncor+1)){
X[,k] = newx
} else X[,k] = sample(xmaster)
}
if(!umc) C = 0
if(umc) {
newx = numeric(N)
c1 = as.logical(rbinom(N, 1, sqrt(corr)))
newx[which(c1)] = X[,1][which(c1)]
newx[which(!c1)] = sample(X[,1][which(!c1)])
C = newx
}
py = b0 + X %*% coef + C * 0.5
if(family$family=='binomial') y = rbinom(N, 1, 1/(1+exp(-py)))
if(family$family=='gaussian') y = rnorm(N) + py
list(dat=data.frame(X,y), family=family, C=C)
}
set.seed(123)
# simulate so there is one causal effect in the negative direction that is correlated with a
# single, non-causal exposure
dat = dgm(N = 500, coef=c(0, 1, 0, 0, 0), ncor = 1, corr = .5, family=gaussian())
dat$dat$x2=3-dat$dat$x2 # a trick to reverse the effect
cor(dat$dat) # x2 is negatively correlated with X1 and negatively correlated with Y (which induces a positive bivariate correlation between X and Y)
# x1 x2 x3 x4 x5 y
#x1 1.0000000 -0.478400 -0.075200000 -0.02080000 0.01760000 0.339131386
#x2 -0.4784000 1.000000 0.016000000 0.02880000 0.06080000 -0.730573991
#x3 -0.0752000 0.016000 1.000000000 0.05920000 -0.02080000 -0.002838093
#x4 -0.0208000 0.028800 0.059200000 1.00000000 -0.03680000 -0.013095436
#x5 0.0176000 0.060800 -0.020800000 -0.03680000 1.00000000 0.013773360
#y 0.3391314 -0.730574 -0.002838093 -0.01309544 0.01377336 1.000000000
# analyzing these data with a linear model, quantile g-computation, and weighted quantile sum regression
# linear model gets it right (there are no coefficients suggesting a positive effect)
lm(y~x1 + x2 + x3 + x4 + x5,dat=dat$dat)
# Coefficients:
# (Intercept) x1 x2 x3 x4 x5
# 2.92968 -0.04449 -0.97942 0.03914 -0.05154 0.08200
# qgcomp gets it right
qgcomp.noboot(y~.,dat=dat$dat, family=dat$family)
# Estimate Std. Error Lower CI Upper CI t value Pr(>|z|)
# (Intercept) 2.92968 0.16978 2.5969 3.26245 17.256 < 2.2e-16
# psi1 -0.95430 0.10901 -1.1680 -0.74064 -8.754 < 2.2e-16
#wqs conjures a positive effect out of nowhere
wqsresults = gwqs(y ~ wqs, mix_name = c("x1", "x2", "x3", "x4", "x5"), data = dat$dat, plan_strategy="multiprocess", validation=0.6, q = NULL, family = "gaussian", seed = NULL, plots=FALSE)
summary(wqsresults$fit)
#Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.6791 0.2061 3.295 0.0011 **
# wqs 0.5648 0.1292 4.371 1.71e-05 ***
wqsresults$final_weights
# mix_name mean_weight
# x1 x1 0.4817969
# x3 x3 0.2544034
# x5 x5 0.1608151
# x4 x4 0.1029847
# x2 x2 0.0000000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment