Skip to content

Instantly share code, notes, and snippets.

@khakieconomics
Created May 6, 2016 03:25
Show Gist options
  • Save khakieconomics/988a5d568162cbd96f5c3b8bf8ef4ddb to your computer and use it in GitHub Desktop.
Save khakieconomics/988a5d568162cbd96f5c3b8bf8ef4ddb to your computer and use it in GitHub Desktop.
library(MASS); library(rstan)
# Sample size
N <- 5000
# Generate two uncorrelated covariates (means 1 and 5, both with standard deviations of 1)
X <- mvrnorm(N, c(1, 5), matrix(c(1, 0, 0, 1), 2, 2))
plot(X)
# Loadings matrix
Gamma <- matrix(c(0.5, 1,
-1, 2),
2, 2)
Gamma
# Let's generate the latent variables
# Mean vector comes from X * Gamma
# Covariance matrix?
# correlation matrix
Omega <- matrix(c(1, 0.5, 0.5, 1), 2, 2)
Omega
# standard deviations
tau <- c(3, 2)
# Covariance matrix by definition of what a covariance matrix is -- the quadratic form of a correlation matrix and scale vectors
Sigma <- diag(tau) %*% Omega %*% diag(tau)
# Now draw some latent variables. The mean at each data point comes from X*Gamma, and the covariance from Sigma.
Z <- t(apply(X %*% Gamma, 1, function(x) mvrnorm(1, mu = x, Sigma = Sigma)))
# Couple of negatives. Get rid of these.
Z[Z<=0] <- 0.1
# For each person we draw their citation count from a negative binomial distribution
# apply the rnegbin distribution to columns (margin 2)
Y <- apply(Z, 2, rnbinom, n = N, prob = 0.5)
# So now we have outcomes Y, inputs X, and we want to estimate Gamma, Omega, and tau
mod <- "
data {
int N;
int P; // number of outcomes
int P2; // number of features
int<lower = 0> Y[N, P]; // your outcomes
matrix[N, P2] X; // your features
}
parameters {
matrix<lower = 0>[N, P] Z;// latent variables
matrix[P2, P2] Gamma; // loading matrix
vector<lower = 0>[P2] tau; // scale of residuals
real<lower = 0, upper = 1> theta1; // shape parameter for negative binomial
real<lower = 0, upper = 1> theta2;
corr_matrix[P2] Omega;
}
model {
// priors
Omega ~ lkj_corr(3); // 3 is fairly loose prior on correlations
tau ~ cauchy(0, 1); // no idea, Cauchy allows for potentially big errors
to_vector(Gamma) ~ normal(0, 2); // fairly loose prior
theta1 ~ beta(0.5, 0.5);
theta2 ~ beta(0.5, 0.5);
// latent variables
for(n in 1:N) {
Z[n] ~ multi_normal(X[n]*Gamma, quad_form_diag(Omega, tau));
}
// likelihood
Y[,1] ~ neg_binomial(Z[,1], theta1);
Y[,2] ~ neg_binomial(Z[,2], theta2);
}"
compiled_model <- stan_model(model_code = mod)
# Try estimating it using penalized likelihood
optim_test <- optimizing(compiled_model, data = list(N = nrow(Y), P = 2, P2 = 2, X = X, Y = Y))
hmc_test <- sampling(compiled_model, data = list(N = nrow(Y), P = 2, P2 = 2, X = X, Y = Y), iter = 1000, cores = 4)
shinystan::launch_shinystan(hmc_test)
print(hmc_test, pars = c("Gamma", "tau", "Omega"))
Gamma
Omega
tau
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment