Created
March 16, 2020 17:43
-
-
Save alexpkeil1/1e4bdf8e2101198c6da454e4a2ca1937 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example in which weighted quantile sums estimates a significant, positive effect of exposure due to pure confounding | |
# even though there are no exposures that have a positive effect on the outcome | |
library(qgcomp) | |
library(gWQS) | |
dgm <- function(N = 100, b0=0, coef=c(1,0,0,0), family=gaussian(), ncor=0, corr=0.75, umc=FALSE){ | |
# simulate under data structure where WQS is the truth: e.g. an exposure with | |
# multinomial distribution | |
p = length(coef) | |
if(ncor >= p) ncor = p-1 | |
X = matrix(nrow=N, ncol=p) | |
colnames(X) <- paste0("x", 1:p) | |
xmaster = sample(rep(0:3, length.out=N), N, replace=FALSE) | |
for(k in 1:p){ | |
newx = numeric(N) | |
c1 = as.logical(rbinom(N, 1, sqrt(corr))) | |
newx[which(c1)] = xmaster[which(c1)] | |
newx[which(!c1)] = sample(xmaster[which(!c1)]) | |
if(k<=(ncor+1)){ | |
X[,k] = newx | |
} else X[,k] = sample(xmaster) | |
} | |
if(!umc) C = 0 | |
if(umc) { | |
newx = numeric(N) | |
c1 = as.logical(rbinom(N, 1, sqrt(corr))) | |
newx[which(c1)] = X[,1][which(c1)] | |
newx[which(!c1)] = sample(X[,1][which(!c1)]) | |
C = newx | |
} | |
py = b0 + X %*% coef + C * 0.5 | |
if(family$family=='binomial') y = rbinom(N, 1, 1/(1+exp(-py))) | |
if(family$family=='gaussian') y = rnorm(N) + py | |
list(dat=data.frame(X,y), family=family, C=C) | |
} | |
set.seed(123) | |
# simulate so there is one causal effect in the negative direction that is correlated with a | |
# single, non-causal exposure | |
dat = dgm(N = 500, coef=c(0, 1, 0, 0, 0), ncor = 1, corr = .5, family=gaussian()) | |
dat$dat$x2=3-dat$dat$x2 # a trick to reverse the effect | |
cor(dat$dat) # x2 is negatively correlated with X1 and negatively correlated with Y (which induces a positive bivariate correlation between X and Y) | |
# x1 x2 x3 x4 x5 y | |
#x1 1.0000000 -0.478400 -0.075200000 -0.02080000 0.01760000 0.339131386 | |
#x2 -0.4784000 1.000000 0.016000000 0.02880000 0.06080000 -0.730573991 | |
#x3 -0.0752000 0.016000 1.000000000 0.05920000 -0.02080000 -0.002838093 | |
#x4 -0.0208000 0.028800 0.059200000 1.00000000 -0.03680000 -0.013095436 | |
#x5 0.0176000 0.060800 -0.020800000 -0.03680000 1.00000000 0.013773360 | |
#y 0.3391314 -0.730574 -0.002838093 -0.01309544 0.01377336 1.000000000 | |
# analyzing these data with a linear model, quantile g-computation, and weighted quantile sum regression | |
# linear model gets it right (there are no coefficients suggesting a positive effect) | |
lm(y~x1 + x2 + x3 + x4 + x5,dat=dat$dat) | |
# Coefficients: | |
# (Intercept) x1 x2 x3 x4 x5 | |
# 2.92968 -0.04449 -0.97942 0.03914 -0.05154 0.08200 | |
# qgcomp gets it right | |
qgcomp.noboot(y~.,dat=dat$dat, family=dat$family) | |
# Estimate Std. Error Lower CI Upper CI t value Pr(>|z|) | |
# (Intercept) 2.92968 0.16978 2.5969 3.26245 17.256 < 2.2e-16 | |
# psi1 -0.95430 0.10901 -1.1680 -0.74064 -8.754 < 2.2e-16 | |
#wqs conjures a positive effect out of nowhere | |
wqsresults = gwqs(y ~ wqs, mix_name = c("x1", "x2", "x3", "x4", "x5"), data = dat$dat, plan_strategy="multiprocess", validation=0.6, q = NULL, family = "gaussian", seed = NULL, plots=FALSE) | |
summary(wqsresults$fit) | |
#Coefficients: | |
# Estimate Std. Error t value Pr(>|t|) | |
# (Intercept) 0.6791 0.2061 3.295 0.0011 ** | |
# wqs 0.5648 0.1292 4.371 1.71e-05 *** | |
wqsresults$final_weights | |
# mix_name mean_weight | |
# x1 x1 0.4817969 | |
# x3 x3 0.2544034 | |
# x5 x5 0.1608151 | |
# x4 x4 0.1029847 | |
# x2 x2 0.0000000 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment