alexpkeil1/distribution_of_weights_for_correlated_exposures.R

## distribution_of_weights_for_correlated_exposures.R
library(qgcomp)
library(mvtnorm)

Niter = 1000
seedvals = round(runif(Niter)*.Machine$integer.max)


truemod <- function(X,beta){
  X %*% beta
}


analyze <- function(i, n, seeds){
  # simulate exposure from multivariate normal, and Y is a linear function of exposures
  # this leaves some model misspecification because qgcomp assumes linear function of quantized exposures
  set.seed(seeds[i])
  (Sig <- matrix(c(1.0, 0.95, 0.1, 0.95, 1.0, 0.1, 0.1, 0.1, 1.0), nrow=3, byrow=TRUE))
  # true covariance/correlation matrix (same in this case)
  # [,1] [,2] [,3]
  # [1,]  1.00  0.95  0.1
  # [2,]  0.95  1.00  0.1
  # [3,]  0.10  0.10  1.0
  #
  X <- qgcomp:::.rmvnorm(n, mu=c(0,0,0), Sigma = Sig)
  y <- truemod(X, c(.8,0.0,0.1)) + rnorm(n)

  ft <- suppressMessages(qgcomp.noboot(y ~ ., data=data.frame(y=y,x=X)))
  wts <- c(ft$pos.weights, - ft$neg.weights)
  c(pospsi= ft$pos.psi, negpsi = ft$neg.psi, wt=wts[order(names(wts))])
}

analyze_qtrue <- function(i, n, seeds){
  # simulate exposure from multivariate normal, and Y is a linear function of quantized exposures
  # this is equivalent
  set.seed(seeds[i])
  (Sig <- matrix(c(1.0, 0.95, 0.1, 0.95, 1.0, 0.1, 0.1, 0.1, 1.0), nrow=3, byrow=TRUE))
  X <- qgcomp:::.rmvnorm(n, mu=c(0,0,0), Sigma = Sig)
  qX <- quantize(X, expnms=c(1,2,3))$data
  #cor(qX)
  # estimated correlation matrix in big sample
  # [,1]      [,2]     [,3]
  # [1,] 1.0000000 0.8930608 0.084068
  # [2,] 0.8930608 1.0000000 0.084752
  # [3,] 0.0840680 0.0847520 1.000000

  y <- truemod(qX, c(.8,0.0,0.1)) + rnorm(n)

  ft <- suppressMessages(qgcomp.noboot(y ~ ., data=data.frame(y=y,x=X)))
  wts <- c(ft$pos.weights, - ft$neg.weights)
  c(pospsi= ft$pos.psi, negpsi = ft$neg.psi, wt=wts[order(names(wts))])
}


# results for continuous exposures (and hence misspecified qgcomp)
resL <- lapply(1:Niter, analyze, n=100, seeds=seedvals)
res <- data.frame(do.call(rbind, resL))
apply(res, 2, function(x) c(mean=mean(x), sd=sd(x), prob.gt0 = mean(x>0)))

#             pospsi      negpsi    wt.x.1    wt.x.2      wt.x.3
# mean     0.7996361 -0.02680614 0.5863023 0.1281963 -0.02849861
# sd       0.1342446  0.05815589 0.2770571 0.5136043  0.40863716
# prob.gt0 1.0000000  0.00000000 0.9870000 0.8390000  0.83200000 # probability that quantity is > 0


# results for quantized exposures (correctly specified qgcomp)
resL <- lapply(1:Niter, analyze_qtrue, n=100, seeds=seedvals)
res <- data.frame(do.call(rbind, resL))
apply(res, 2, function(x) c(mean=mean(x), sd=sd(x), prob.gt0 = mean(x>0)))

#             pospsi      negpsi    wt.x.1    wt.x.2      wt.x.3
# mean     0.9841925 -0.07942579 0.8009307 -0.3662812 0.008350436
# sd       0.1539105  0.11361196 0.1486859  0.5858848 0.326567641
# prob.gt0 1.0000000  0.00000000 1.0000000  0.5240000 0.861000000

hist(res$wt.x.2)
	library(qgcomp)
	library(mvtnorm)

	Niter = 1000
	seedvals = round(runif(Niter)*.Machine$integer.max)


	truemod <- function(X,beta){
	X %*% beta
	}


	analyze <- function(i, n, seeds){
	# simulate exposure from multivariate normal, and Y is a linear function of exposures
	# this leaves some model misspecification because qgcomp assumes linear function of quantized exposures
	set.seed(seeds[i])
	(Sig <- matrix(c(1.0, 0.95, 0.1, 0.95, 1.0, 0.1, 0.1, 0.1, 1.0), nrow=3, byrow=TRUE))
	# true covariance/correlation matrix (same in this case)
	# [,1] [,2] [,3]
	# [1,] 1.00 0.95 0.1
	# [2,] 0.95 1.00 0.1
	# [3,] 0.10 0.10 1.0
	#
	X <- qgcomp:::.rmvnorm(n, mu=c(0,0,0), Sigma = Sig)
	y <- truemod(X, c(.8,0.0,0.1)) + rnorm(n)

	ft <- suppressMessages(qgcomp.noboot(y ~ ., data=data.frame(y=y,x=X)))
	wts <- c(ft$pos.weights, - ft$neg.weights)
	c(pospsi= ft$pos.psi, negpsi = ft$neg.psi, wt=wts[order(names(wts))])
	}

	analyze_qtrue <- function(i, n, seeds){
	# simulate exposure from multivariate normal, and Y is a linear function of quantized exposures
	# this is equivalent
	set.seed(seeds[i])
	(Sig <- matrix(c(1.0, 0.95, 0.1, 0.95, 1.0, 0.1, 0.1, 0.1, 1.0), nrow=3, byrow=TRUE))
	X <- qgcomp:::.rmvnorm(n, mu=c(0,0,0), Sigma = Sig)
	qX <- quantize(X, expnms=c(1,2,3))$data
	#cor(qX)
	# estimated correlation matrix in big sample
	# [,1] [,2] [,3]
	# [1,] 1.0000000 0.8930608 0.084068
	# [2,] 0.8930608 1.0000000 0.084752
	# [3,] 0.0840680 0.0847520 1.000000

	y <- truemod(qX, c(.8,0.0,0.1)) + rnorm(n)

	ft <- suppressMessages(qgcomp.noboot(y ~ ., data=data.frame(y=y,x=X)))
	wts <- c(ft$pos.weights, - ft$neg.weights)
	c(pospsi= ft$pos.psi, negpsi = ft$neg.psi, wt=wts[order(names(wts))])
	}


	# results for continuous exposures (and hence misspecified qgcomp)
	resL <- lapply(1:Niter, analyze, n=100, seeds=seedvals)
	res <- data.frame(do.call(rbind, resL))
	apply(res, 2, function(x) c(mean=mean(x), sd=sd(x), prob.gt0 = mean(x>0)))

	# pospsi negpsi wt.x.1 wt.x.2 wt.x.3
	# mean 0.7996361 -0.02680614 0.5863023 0.1281963 -0.02849861
	# sd 0.1342446 0.05815589 0.2770571 0.5136043 0.40863716
	# prob.gt0 1.0000000 0.00000000 0.9870000 0.8390000 0.83200000 # probability that quantity is > 0



	# results for quantized exposures (correctly specified qgcomp)
	resL <- lapply(1:Niter, analyze_qtrue, n=100, seeds=seedvals)
	res <- data.frame(do.call(rbind, resL))
	apply(res, 2, function(x) c(mean=mean(x), sd=sd(x), prob.gt0 = mean(x>0)))

	# pospsi negpsi wt.x.1 wt.x.2 wt.x.3
	# mean 0.9841925 -0.07942579 0.8009307 -0.3662812 0.008350436
	# sd 0.1539105 0.11361196 0.1486859 0.5858848 0.326567641
	# prob.gt0 1.0000000 0.00000000 1.0000000 0.5240000 0.861000000

	hist(res$wt.x.2)