-
-
Save rmcelreath/68fb40bf7a3e73bf2623e10a5973216d to your computer and use it in GitHub Desktop.
# proxy confounder problem | |
# cf https://twitter.com/y2silence/status/1251151157264674816 | |
# note that I rename X1,X2 to A,B and A to X | |
# The DAG: | |
# X -> Y | |
# X <- U -> Y | |
# A <- U -> B | |
# Can we use A and B to deconfound X->Y? | |
# Depends upon functions relating the variables | |
# But in linear/gaussian case, progress possible | |
# simulate | |
N <- 500 | |
U <- rnorm(N) | |
X <- rnorm(N, U ) | |
Y <- rnorm(N, 0.5*X - U ) | |
A <- rnorm(N, U ) | |
B <- rnorm(N, U ) | |
# basic regression says what? | |
library(rethinking) | |
precis( lm(Y ~ X) ) # confounded naturally | |
# now full bayes model of SCM | |
# missing data version - works remarkably well | |
# treat each U as unobserved and assign it a parameter | |
# rest is just the data generating model | |
# techinical issue here is that U values can flip pos/neg across chains and then the coefficients flip pos/neg to match | |
# but bXY seems to work fine despite this | |
stan_code <- " | |
data{ | |
int N; | |
vector[N] Y; | |
vector[N] X; | |
vector[N] A; | |
vector[N] B; | |
} | |
parameters{ | |
vector[4] a; | |
real bXY; | |
real bUX; | |
real bUY; | |
real bUA; | |
real<upper=bUA> bUB; | |
vector<lower=0>[4] sigma; | |
vector[N] U; | |
} | |
model{ | |
vector[N] muY; | |
vector[N] muX; | |
vector[N] muA; | |
vector[N] muB; | |
sigma ~ exponential( 1 ); | |
bXY ~ normal( 0 , 1 ); | |
bUY ~ normal( 0 , 1 ); | |
bUX ~ normal( 0 , 1 ); | |
bUA ~ normal( 0 , 1 ); | |
bUB ~ normal( 0 , 1 ); | |
a ~ normal( 0 , 1 ); | |
U ~ normal( 0 , 1 ); | |
// B <- U | |
for ( i in 1:500 ) | |
muB[i] = a[4] + bUB*U[i]; | |
B ~ normal( muB , sigma[4] ); | |
// A <- U | |
for ( i in 1:500 ) | |
muA[i] = a[3] + bUA*U[i]; | |
A ~ normal( muA , sigma[3] ); | |
// X <- U | |
for ( i in 1:500 ) | |
muX[i] = a[2] + bUX*U[i]; | |
X ~ normal( muX , sigma[2] ); | |
// U -> Y <- X | |
for ( i in 1:500 ) | |
muY[i] = a[1] + bXY*X[i] + bUY*U[i]; | |
Y ~ normal( muY , sigma[1] ); | |
} | |
" | |
dat <- list(N=N,Y=Y,X=X,A=A,B=B) | |
ms <- stan( model_code=stan_code , data=dat , chains=1 , cores=4 , control=list(adapt_delta=0.99) , iter=4000 ) | |
precis(ms) | |
# covariance version marginalizing over unobserved U | |
# need to express paths in the covariance matrix | |
# this has pos/neg flipping issues with all the correlations with U | |
# but again gets the X -> Y right | |
stan_marginal <- " | |
data{ | |
int N; | |
vector[N] B; | |
vector[N] A; | |
vector[N] Y; | |
vector[N] X; | |
} | |
transformed data{ | |
vector[4] YY[N]; | |
for ( j in 1:N ) YY[j] = [ Y[j] , X[j] , A[j] , B[j] ]'; | |
} | |
parameters{ | |
real aB; | |
real aA; | |
real aX; | |
real aY; | |
real rXY; | |
real rUX; | |
real rUA; | |
real rUB; | |
real rUY; | |
vector<lower=0>[4] s; // std dev of each obs var | |
} | |
model{ | |
vector[N] muY; | |
matrix[4,4] SIGMA; | |
s ~ exponential( 1 ); | |
rXY ~ normal( 0 , 1 ); | |
rUY ~ normal( 0 , 1 ); | |
rUX ~ normal( 0 , 1 ); | |
rUA ~ normal( 0 , 1 ); | |
rUB ~ normal( 0 , 1 ); | |
aY ~ normal( 0 , 1 ); | |
aX ~ normal( 0 , 1 ); | |
aA ~ normal( 0 , 1 ); | |
aB ~ normal( 0 , 1 ); | |
// build covariance matrix from path model | |
for( i in 1:4 ) SIGMA[i,i] = s[i]^2; | |
SIGMA[1,2] = rUY*rUX + rXY; // YX | |
SIGMA[1,3] = rUY*rUA; // YA | |
SIGMA[1,4] = rUY*rUB; // YB | |
SIGMA[2,3] = rUX*rUA; // XA | |
SIGMA[2,4] = rUX*rUB; // XB | |
SIGMA[3,4] = rUA*rUB; // AB | |
for ( i in 1:3 ) for ( j in (i+1):4 ) SIGMA[j,i] = SIGMA[i,j]; | |
{ | |
vector[4] MU; | |
MU = [ aY , aX , aA , aB ]'; | |
YY ~ multi_normal( MU , SIGMA ); | |
} | |
} | |
" | |
# priors not constrained to produce positive definite covariance matrix | |
# chains will sputter while trying to sample valid initial matrix | |
# some inits could help with adaptation | |
m2s <- stan( model_code=stan_marginal , data=list( N=N , Y=Y , X=X , A=A, B=B ) , chains=1, cores=4 ) | |
precis(m2s) | |
# blavaan | |
# works but priors are quite different | |
library(blavaan) | |
m4 <- 'U =~ A + B + X + Y | |
Y ~ X' | |
mb <- bsem( m4, data=data.frame( Y=Y , X=X , A=A, B=B ) , bcontrol=list(cores=4) ) | |
summary(mb) | |
stancode( mb@external$mcmcout ) # this is unreadable |
If
So, if
Thanks, sounds tricky but makes sense. One other clarification:
Stan continues to run but it is not drawing from the posterior distribution that it would draw from if you incorporated the truncation term.
I see two possible implications:
- The priors you think you are using are not the priors you are actually using.
- The posterior might be improper, because you are using an improper prior.
Is that right, and am I missing other implications?
I don't think (2) is an issue because if you had priors that integrated to 1 before considering that the positive definiteness constraint took a bite out of the parameter space, then whatever is left integrates to some non-constant that is less than 1. But there is also (3) that Stan might not be able to sample efficiently enough for the MCMC CLT to hold.
Ben, say that I want a prior for a 4x4 covariance matrix, except the model specification requires that two off-diagonal entries must be fixed to 0 (say, sigma_12 and sigma_34). What do you do to enforce positive definite there? Something like lkj would be over the whole correlation matrix, without fixed 0s. I typically use a beta prior for each individual correlation, where beta has support on (-1,1), but I can see that possibly being problematic.