Created
May 6, 2016 03:25
-
-
Save khakieconomics/988a5d568162cbd96f5c3b8bf8ef4ddb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(MASS); library(rstan) | |
# Sample size | |
N <- 5000 | |
# Generate two uncorrelated covariates (means 1 and 5, both with standard deviations of 1) | |
X <- mvrnorm(N, c(1, 5), matrix(c(1, 0, 0, 1), 2, 2)) | |
plot(X) | |
# Loadings matrix | |
Gamma <- matrix(c(0.5, 1, | |
-1, 2), | |
2, 2) | |
Gamma | |
# Let's generate the latent variables | |
# Mean vector comes from X * Gamma | |
# Covariance matrix? | |
# correlation matrix | |
Omega <- matrix(c(1, 0.5, 0.5, 1), 2, 2) | |
Omega | |
# standard deviations | |
tau <- c(3, 2) | |
# Covariance matrix by definition of what a covariance matrix is -- the quadratic form of a correlation matrix and scale vectors | |
Sigma <- diag(tau) %*% Omega %*% diag(tau) | |
# Now draw some latent variables. The mean at each data point comes from X*Gamma, and the covariance from Sigma. | |
Z <- t(apply(X %*% Gamma, 1, function(x) mvrnorm(1, mu = x, Sigma = Sigma))) | |
# Couple of negatives. Get rid of these. | |
Z[Z<=0] <- 0.1 | |
# For each person we draw their citation count from a negative binomial distribution | |
# apply the rnegbin distribution to columns (margin 2) | |
Y <- apply(Z, 2, rnbinom, n = N, prob = 0.5) | |
# So now we have outcomes Y, inputs X, and we want to estimate Gamma, Omega, and tau | |
mod <- " | |
data { | |
int N; | |
int P; // number of outcomes | |
int P2; // number of features | |
int<lower = 0> Y[N, P]; // your outcomes | |
matrix[N, P2] X; // your features | |
} | |
parameters { | |
matrix<lower = 0>[N, P] Z;// latent variables | |
matrix[P2, P2] Gamma; // loading matrix | |
vector<lower = 0>[P2] tau; // scale of residuals | |
real<lower = 0, upper = 1> theta1; // shape parameter for negative binomial | |
real<lower = 0, upper = 1> theta2; | |
corr_matrix[P2] Omega; | |
} | |
model { | |
// priors | |
Omega ~ lkj_corr(3); // 3 is fairly loose prior on correlations | |
tau ~ cauchy(0, 1); // no idea, Cauchy allows for potentially big errors | |
to_vector(Gamma) ~ normal(0, 2); // fairly loose prior | |
theta1 ~ beta(0.5, 0.5); | |
theta2 ~ beta(0.5, 0.5); | |
// latent variables | |
for(n in 1:N) { | |
Z[n] ~ multi_normal(X[n]*Gamma, quad_form_diag(Omega, tau)); | |
} | |
// likelihood | |
Y[,1] ~ neg_binomial(Z[,1], theta1); | |
Y[,2] ~ neg_binomial(Z[,2], theta2); | |
}" | |
compiled_model <- stan_model(model_code = mod) | |
# Try estimating it using penalized likelihood | |
optim_test <- optimizing(compiled_model, data = list(N = nrow(Y), P = 2, P2 = 2, X = X, Y = Y)) | |
hmc_test <- sampling(compiled_model, data = list(N = nrow(Y), P = 2, P2 = 2, X = X, Y = Y), iter = 1000, cores = 4) | |
shinystan::launch_shinystan(hmc_test) | |
print(hmc_test, pars = c("Gamma", "tau", "Omega")) | |
Gamma | |
Omega | |
tau |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment