|
library(qgcomp) |
|
library(mvtnorm) |
|
|
|
Niter = 1000 |
|
seedvals = round(runif(Niter)*.Machine$integer.max) |
|
|
|
|
|
truemod <- function(X,beta){ |
|
X %*% beta |
|
} |
|
|
|
|
|
analyze <- function(i, n, seeds){ |
|
# simulate exposure from multivariate normal, and Y is a linear function of exposures |
|
# this leaves some model misspecification because qgcomp assumes linear function of quantized exposures |
|
set.seed(seeds[i]) |
|
(Sig <- matrix(c(1.0, 0.95, 0.1, 0.95, 1.0, 0.1, 0.1, 0.1, 1.0), nrow=3, byrow=TRUE)) |
|
# true covariance/correlation matrix (same in this case) |
|
# [,1] [,2] [,3] |
|
# [1,] 1.00 0.95 0.1 |
|
# [2,] 0.95 1.00 0.1 |
|
# [3,] 0.10 0.10 1.0 |
|
# |
|
X <- qgcomp:::.rmvnorm(n, mu=c(0,0,0), Sigma = Sig) |
|
y <- truemod(X, c(.8,0.0,0.1)) + rnorm(n) |
|
|
|
ft <- suppressMessages(qgcomp.noboot(y ~ ., data=data.frame(y=y,x=X))) |
|
wts <- c(ft$pos.weights, - ft$neg.weights) |
|
c(pospsi= ft$pos.psi, negpsi = ft$neg.psi, wt=wts[order(names(wts))]) |
|
} |
|
|
|
analyze_qtrue <- function(i, n, seeds){ |
|
# simulate exposure from multivariate normal, and Y is a linear function of quantized exposures |
|
# this is equivalent |
|
set.seed(seeds[i]) |
|
(Sig <- matrix(c(1.0, 0.95, 0.1, 0.95, 1.0, 0.1, 0.1, 0.1, 1.0), nrow=3, byrow=TRUE)) |
|
X <- qgcomp:::.rmvnorm(n, mu=c(0,0,0), Sigma = Sig) |
|
qX <- quantize(X, expnms=c(1,2,3))$data |
|
#cor(qX) |
|
# estimated correlation matrix in big sample |
|
# [,1] [,2] [,3] |
|
# [1,] 1.0000000 0.8930608 0.084068 |
|
# [2,] 0.8930608 1.0000000 0.084752 |
|
# [3,] 0.0840680 0.0847520 1.000000 |
|
|
|
y <- truemod(qX, c(.8,0.0,0.1)) + rnorm(n) |
|
|
|
ft <- suppressMessages(qgcomp.noboot(y ~ ., data=data.frame(y=y,x=X))) |
|
wts <- c(ft$pos.weights, - ft$neg.weights) |
|
c(pospsi= ft$pos.psi, negpsi = ft$neg.psi, wt=wts[order(names(wts))]) |
|
} |
|
|
|
|
|
# results for continuous exposures (and hence misspecified qgcomp) |
|
resL <- lapply(1:Niter, analyze, n=100, seeds=seedvals) |
|
res <- data.frame(do.call(rbind, resL)) |
|
apply(res, 2, function(x) c(mean=mean(x), sd=sd(x), prob.gt0 = mean(x>0))) |
|
|
|
# pospsi negpsi wt.x.1 wt.x.2 wt.x.3 |
|
# mean 0.7996361 -0.02680614 0.5863023 0.1281963 -0.02849861 |
|
# sd 0.1342446 0.05815589 0.2770571 0.5136043 0.40863716 |
|
# prob.gt0 1.0000000 0.00000000 0.9870000 0.8390000 0.83200000 # probability that quantity is > 0 |
|
|
|
|
|
|
|
# results for quantized exposures (correctly specified qgcomp) |
|
resL <- lapply(1:Niter, analyze_qtrue, n=100, seeds=seedvals) |
|
res <- data.frame(do.call(rbind, resL)) |
|
apply(res, 2, function(x) c(mean=mean(x), sd=sd(x), prob.gt0 = mean(x>0))) |
|
|
|
# pospsi negpsi wt.x.1 wt.x.2 wt.x.3 |
|
# mean 0.9841925 -0.07942579 0.8009307 -0.3662812 0.008350436 |
|
# sd 0.1539105 0.11361196 0.1486859 0.5858848 0.326567641 |
|
# prob.gt0 1.0000000 0.00000000 1.0000000 0.5240000 0.861000000 |
|
|
|
hist(res$wt.x.2) |
This looks at the distributions of the weights for non-causal exposures that are highly correlated with a strongly causal exposure. The mean weight will generally be in the opposite direction of the effect of the strongly causal exposure, but the probability that the weight will be greater than/less than zero should be 0.5 in a correctly specified model. The distribution of the weight will be multimodal with a peak at -1.0 and then a smoother distribution for positive weights with most of the weight around 0.0.