stephensrmmartin/gist:fb1a8808148e175b9075cdca66c76588

## gistfile1.txt
---
title: "Bayesian SEM example"
output: html_notebook
---

The example below will use the "Political Democracy" dataset from the lavaan package.
```{r}
library(lavaan)
library(rstan)
options(mc.cores=4)
library(ggplot2)
library(bayesplot)
library(ggjoy)
library(tidyr)

data("PoliticalDemocracy")
ds <- PoliticalDemocracy
rm(PoliticalDemocracy)

head(ds)
```

The lavaan example proposes the following model, omitting for brevity the residual correlations:
```{r}
model <- '
ind60 =~ x1 + x2 + x3
dem60 =~ y1 + y2 + y3 + y4
dem65 =~ y5 + y6 + y7 + y8

dem60 ~ ind60
dem65 ~ ind60 + dem60
'

lavOut <- sem(model=model,data = ds,std.lv=TRUE,std.ov=TRUE)
summary(lavOut,standardized=TRUE)
```

A similarly simple model may be fit in stan using the following code.
This model is _not_ estimated using a covariance-fit approach, but rather as a true system of linear models with latent variables treated as missing variables to be estimated.
This code will be written for _clarity_, not for generalizability, efficiency, or brevity.
The model could be greatly simplified (in an engineering sense) through matrices.
Data are standardized so that the mean structure can be approximately zero, for simplicity, and priors are easier to set.
Finally, a variance-identification is used, such that endogenous residual variance is set to 1 and exogenous variance is set to 1.
```{r}
stanModel <- '
data {
  int N;
  vector[N] x1;
  vector[N] x2;
  vector[N] x3;
  vector[N] y1;
  vector[N] y2;
  vector[N] y3;
  vector[N] y4;
  vector[N] y5;
  vector[N] y6;
  vector[N] y7;
  vector[N] y8;
}

parameters {
// Measurement models
  // Loadings
  vector<lower=0>[3] lambdaInd60;
  vector<lower=0>[4] lambdaDem60;
  vector<lower=0>[4] lambdaDem65;
  // Residuals/Deltas
  vector<lower=0>[3] residInd60;
  vector<lower=0>[4] residDem60;
  vector<lower=0>[4] residDem65;
  // Mean structure omitted due to standardization

// Factor scores
  vector[N] ind60;
  vector[N] dem60;
  vector[N] dem65;

// Structural coefficients
  real betaInd60_dem60;
  real betaInd60_dem65;
  real betaDem60_dem65;
}

model {
  // Priors
  lambdaInd60 ~ normal(0,1);
  lambdaDem60 ~ normal(0,1);
  lambdaDem65 ~ normal(0,1);

  residInd60 ~ cauchy(0,1);
  residDem60 ~ cauchy(0,1);
  residDem65 ~ cauchy(0,1);

  betaInd60_dem60 ~ normal(0,10);
  betaInd60_dem65 ~ normal(0,10);
  betaDem60_dem65 ~ normal(0,10);

  // Factor scores; Variance-parameterization
  ind60 ~ normal(0,1);
  dem60 ~ normal(betaInd60_dem60*ind60,1);
  dem65 ~ normal(betaInd60_dem65*ind60 + betaDem60_dem65*dem60,1);

  // Likelihoods
  x1 ~ normal(lambdaInd60[1]*ind60,sqrt(residInd60[1])); // Using sqrt() so resid will be variances, not residual SD
  x2 ~ normal(lambdaInd60[2]*ind60,sqrt(residInd60[2])); // This makes comparison to lavaan a bit simpler
  x3 ~ normal(lambdaInd60[3]*ind60,sqrt(residInd60[3]));

  y1 ~ normal(lambdaDem60[1]*dem60, sqrt(residDem60[1]));
  y2 ~ normal(lambdaDem60[2]*dem60, sqrt(residDem60[2]));
  y3 ~ normal(lambdaDem60[3]*dem60, sqrt(residDem60[3]));
  y4 ~ normal(lambdaDem60[4]*dem60, sqrt(residDem60[4]));

  y5 ~ normal(lambdaDem65[1]*dem65, sqrt(residDem65[1]));
  y6 ~ normal(lambdaDem65[2]*dem65, sqrt(residDem65[2]));
  y7 ~ normal(lambdaDem65[3]*dem65, sqrt(residDem65[3]));
  y8 ~ normal(lambdaDem65[4]*dem65, sqrt(residDem65[4]));

}

generated quantities {
  vector[3] betas_std;
  {
    real sd_dem60 = sqrt(1 + variance(betaInd60_dem60*ind60)); // Var(Y) = Var(Error) + Var(Predicted)
    real sd_dem65 = sqrt(1 + variance(betaInd60_dem65*ind60 + betaDem60_dem65*dem60));
    betas_std[1] = betaInd60_dem60/sd_dem60; //Standardized beta = beta*sd(x)/sd(y)
    betas_std[2] = betaInd60_dem65/sd_dem65;
    betas_std[3] = betaDem60_dem65*sd_dem60/sd_dem65;
  }
}
'
dsStd <- as.data.frame(scale(ds)) # Standardize observed variables, so mean structure is vector of 0s.
stanOut <- stan(model_code = stanModel,data=c(dsStd,N=nrow(dsStd)),control=list(adapt_delta=.99),refresh=0,iter=5000)
summary(stanOut,pars=c('lambdaInd60','lambdaDem60','lambdaDem65'))$summary[,c(1,3,4,8)]
summary(stanOut,pars=c('residInd60','residDem60','residDem65'))$summary[,c(1,3,4,8)]
summary(stanOut,pars=c('betaInd60_dem60','betaInd60_dem65','betaDem60_dem65','betas_std'))$summary[,c(1,3,4,8)]

```
The loadings, residual variances, and betas (unstandardized and standardized) can be compared between lavaan and the stan output.

The factor scores are estimated "for free":
```{r}
ind60 <- as.data.frame(stanOut,pars='ind60') %>% gather(key='Subject',value='Score')
dem60 <- as.data.frame(stanOut,pars='dem60') %>% gather(key='Subject',value='Score')
dem65 <- as.data.frame(stanOut,pars='dem65') %>% gather(key='Subject',value='Score')

ggplot(data=ind60, aes(x=Score,y=Subject)) + geom_joy()
ggplot(data=dem60, aes(x=Score,y=Subject)) + geom_joy()
ggplot(data=dem65, aes(x=Score,y=Subject)) + geom_joy()
```
	---
	title: "Bayesian SEM example"
	output: html_notebook
	---

	The example below will use the "Political Democracy" dataset from the lavaan package.
	```{r}
	library(lavaan)
	library(rstan)
	options(mc.cores=4)
	library(ggplot2)
	library(bayesplot)
	library(ggjoy)
	library(tidyr)

	data("PoliticalDemocracy")
	ds <- PoliticalDemocracy
	rm(PoliticalDemocracy)

	head(ds)
	```

	The lavaan example proposes the following model, omitting for brevity the residual correlations:
	```{r}
	model <- '
	ind60 =~ x1 + x2 + x3
	dem60 =~ y1 + y2 + y3 + y4
	dem65 =~ y5 + y6 + y7 + y8

	dem60 ~ ind60
	dem65 ~ ind60 + dem60
	'

	lavOut <- sem(model=model,data = ds,std.lv=TRUE,std.ov=TRUE)
	summary(lavOut,standardized=TRUE)
	```

	A similarly simple model may be fit in stan using the following code.
	This model is _not_ estimated using a covariance-fit approach, but rather as a true system of linear models with latent variables treated as missing variables to be estimated.
	This code will be written for _clarity_, not for generalizability, efficiency, or brevity.
	The model could be greatly simplified (in an engineering sense) through matrices.
	Data are standardized so that the mean structure can be approximately zero, for simplicity, and priors are easier to set.
	Finally, a variance-identification is used, such that endogenous residual variance is set to 1 and exogenous variance is set to 1.
	```{r}
	stanModel <- '
	data {
	int N;
	vector[N] x1;
	vector[N] x2;
	vector[N] x3;
	vector[N] y1;
	vector[N] y2;
	vector[N] y3;
	vector[N] y4;
	vector[N] y5;
	vector[N] y6;
	vector[N] y7;
	vector[N] y8;
	}

	parameters {
	// Measurement models
	// Loadings
	vector<lower=0>[3] lambdaInd60;
	vector<lower=0>[4] lambdaDem60;
	vector<lower=0>[4] lambdaDem65;
	// Residuals/Deltas
	vector<lower=0>[3] residInd60;
	vector<lower=0>[4] residDem60;
	vector<lower=0>[4] residDem65;
	// Mean structure omitted due to standardization

	// Factor scores
	vector[N] ind60;
	vector[N] dem60;
	vector[N] dem65;

	// Structural coefficients
	real betaInd60_dem60;
	real betaInd60_dem65;
	real betaDem60_dem65;
	}

	model {
	// Priors
	lambdaInd60 ~ normal(0,1);
	lambdaDem60 ~ normal(0,1);
	lambdaDem65 ~ normal(0,1);

	residInd60 ~ cauchy(0,1);
	residDem60 ~ cauchy(0,1);
	residDem65 ~ cauchy(0,1);

	betaInd60_dem60 ~ normal(0,10);
	betaInd60_dem65 ~ normal(0,10);
	betaDem60_dem65 ~ normal(0,10);

	// Factor scores; Variance-parameterization
	ind60 ~ normal(0,1);
	dem60 ~ normal(betaInd60_dem60*ind60,1);
	dem65 ~ normal(betaInd60_dem65ind60 + betaDem60_dem65dem60,1);

	// Likelihoods
	x1 ~ normal(lambdaInd60[1]*ind60,sqrt(residInd60[1])); // Using sqrt() so resid will be variances, not residual SD
	x2 ~ normal(lambdaInd60[2]*ind60,sqrt(residInd60[2])); // This makes comparison to lavaan a bit simpler
	x3 ~ normal(lambdaInd60[3]*ind60,sqrt(residInd60[3]));

	y1 ~ normal(lambdaDem60[1]*dem60, sqrt(residDem60[1]));
	y2 ~ normal(lambdaDem60[2]*dem60, sqrt(residDem60[2]));
	y3 ~ normal(lambdaDem60[3]*dem60, sqrt(residDem60[3]));
	y4 ~ normal(lambdaDem60[4]*dem60, sqrt(residDem60[4]));

	y5 ~ normal(lambdaDem65[1]*dem65, sqrt(residDem65[1]));
	y6 ~ normal(lambdaDem65[2]*dem65, sqrt(residDem65[2]));
	y7 ~ normal(lambdaDem65[3]*dem65, sqrt(residDem65[3]));
	y8 ~ normal(lambdaDem65[4]*dem65, sqrt(residDem65[4]));

	}

	generated quantities {
	vector[3] betas_std;
	{
	real sd_dem60 = sqrt(1 + variance(betaInd60_dem60*ind60)); // Var(Y) = Var(Error) + Var(Predicted)
	real sd_dem65 = sqrt(1 + variance(betaInd60_dem65ind60 + betaDem60_dem65dem60));
	betas_std[1] = betaInd60_dem60/sd_dem60; //Standardized beta = beta*sd(x)/sd(y)
	betas_std[2] = betaInd60_dem65/sd_dem65;
	betas_std[3] = betaDem60_dem65*sd_dem60/sd_dem65;
	}
	}
	'
	dsStd <- as.data.frame(scale(ds)) # Standardize observed variables, so mean structure is vector of 0s.
	stanOut <- stan(model_code = stanModel,data=c(dsStd,N=nrow(dsStd)),control=list(adapt_delta=.99),refresh=0,iter=5000)
	summary(stanOut,pars=c('lambdaInd60','lambdaDem60','lambdaDem65'))$summary[,c(1,3,4,8)]
	summary(stanOut,pars=c('residInd60','residDem60','residDem65'))$summary[,c(1,3,4,8)]
	summary(stanOut,pars=c('betaInd60_dem60','betaInd60_dem65','betaDem60_dem65','betas_std'))$summary[,c(1,3,4,8)]

	```
	The loadings, residual variances, and betas (unstandardized and standardized) can be compared between lavaan and the stan output.

	The factor scores are estimated "for free":
	```{r}
	ind60 <- as.data.frame(stanOut,pars='ind60') %>% gather(key='Subject',value='Score')
	dem60 <- as.data.frame(stanOut,pars='dem60') %>% gather(key='Subject',value='Score')
	dem65 <- as.data.frame(stanOut,pars='dem65') %>% gather(key='Subject',value='Score')

	ggplot(data=ind60, aes(x=Score,y=Subject)) + geom_joy()
	ggplot(data=dem60, aes(x=Score,y=Subject)) + geom_joy()
	ggplot(data=dem65, aes(x=Score,y=Subject)) + geom_joy()
	```