khakieconomics/example_for_jeff.R

## example_for_jeff.R

library(MASS); library(rstan)
# Sample size
N <- 5000

# Generate two uncorrelated covariates (means 1 and 5, both with standard deviations of 1)
X <- mvrnorm(N, c(1, 5), matrix(c(1, 0, 0, 1), 2, 2))
plot(X)
# Loadings matrix
Gamma <- matrix(c(0.5, 1,
                  -1, 2),
                2, 2)
Gamma

# Let's generate the latent variables

# Mean vector comes from X * Gamma

# Covariance matrix?

# correlation matrix
Omega <- matrix(c(1, 0.5, 0.5, 1), 2, 2)
Omega

# standard deviations
tau <- c(3, 2)

# Covariance matrix by definition of what a covariance matrix is -- the quadratic form of a correlation matrix and scale vectors
Sigma <- diag(tau) %*% Omega %*% diag(tau)

# Now draw some latent variables. The mean at each data point comes from X*Gamma, and the covariance from Sigma.
Z <- t(apply(X %*% Gamma, 1, function(x) mvrnorm(1, mu = x, Sigma = Sigma)))
# Couple of negatives. Get rid of these.
Z[Z<=0] <- 0.1

# For each person we draw their citation count from a negative binomial distribution

# apply the rnegbin distribution to columns (margin 2)
Y <- apply(Z, 2, rnbinom, n = N, prob = 0.5)

# So now we have outcomes Y, inputs X, and we want to estimate Gamma, Omega, and tau
mod <- "
data {
  int N;
  int P; // number of outcomes
  int P2; // number of features
  int<lower = 0> Y[N, P]; // your outcomes
  matrix[N, P2] X; // your features
}
parameters {
  matrix<lower = 0>[N, P] Z;// latent variables
  matrix[P2, P2] Gamma; // loading matrix
  vector<lower = 0>[P2] tau; // scale of residuals
  real<lower = 0, upper = 1> theta1; // shape parameter for negative binomial
  real<lower = 0, upper = 1> theta2;
  corr_matrix[P2] Omega;
}
model {
  // priors
  Omega ~ lkj_corr(3); // 3 is fairly loose prior on correlations
  tau ~ cauchy(0, 1); // no idea, Cauchy allows for potentially big errors
  to_vector(Gamma) ~ normal(0, 2); // fairly loose prior
  theta1 ~ beta(0.5, 0.5);
  theta2 ~ beta(0.5, 0.5);

  // latent variables
  for(n in 1:N) {
    Z[n] ~ multi_normal(X[n]*Gamma, quad_form_diag(Omega, tau));
  }


  // likelihood
  Y[,1] ~ neg_binomial(Z[,1], theta1);
  Y[,2] ~ neg_binomial(Z[,2], theta2);
}"

compiled_model <- stan_model(model_code = mod)

# Try estimating it using penalized likelihood
optim_test <- optimizing(compiled_model, data = list(N = nrow(Y), P = 2, P2 = 2, X = X, Y = Y))

hmc_test <- sampling(compiled_model, data = list(N = nrow(Y), P = 2, P2 = 2, X = X, Y = Y), iter = 1000, cores = 4)
shinystan::launch_shinystan(hmc_test)
print(hmc_test, pars = c("Gamma", "tau", "Omega"))
Gamma
Omega
tau

	library(MASS); library(rstan)
	# Sample size
	N <- 5000

	# Generate two uncorrelated covariates (means 1 and 5, both with standard deviations of 1)
	X <- mvrnorm(N, c(1, 5), matrix(c(1, 0, 0, 1), 2, 2))
	plot(X)
	# Loadings matrix
	Gamma <- matrix(c(0.5, 1,
	-1, 2),
	2, 2)
	Gamma

	# Let's generate the latent variables

	# Mean vector comes from X * Gamma

	# Covariance matrix?

	# correlation matrix
	Omega <- matrix(c(1, 0.5, 0.5, 1), 2, 2)
	Omega

	# standard deviations
	tau <- c(3, 2)

	# Covariance matrix by definition of what a covariance matrix is -- the quadratic form of a correlation matrix and scale vectors
	Sigma <- diag(tau) %% Omega %% diag(tau)

	# Now draw some latent variables. The mean at each data point comes from X*Gamma, and the covariance from Sigma.
	Z <- t(apply(X %*% Gamma, 1, function(x) mvrnorm(1, mu = x, Sigma = Sigma)))
	# Couple of negatives. Get rid of these.
	Z[Z<=0] <- 0.1

	# For each person we draw their citation count from a negative binomial distribution

	# apply the rnegbin distribution to columns (margin 2)
	Y <- apply(Z, 2, rnbinom, n = N, prob = 0.5)

	# So now we have outcomes Y, inputs X, and we want to estimate Gamma, Omega, and tau
	mod <- "
	data {
	int N;
	int P; // number of outcomes
	int P2; // number of features
	int<lower = 0> Y[N, P]; // your outcomes
	matrix[N, P2] X; // your features
	}
	parameters {
	matrix<lower = 0>[N, P] Z;// latent variables
	matrix[P2, P2] Gamma; // loading matrix
	vector<lower = 0>[P2] tau; // scale of residuals
	real<lower = 0, upper = 1> theta1; // shape parameter for negative binomial
	real<lower = 0, upper = 1> theta2;
	corr_matrix[P2] Omega;
	}
	model {
	// priors
	Omega ~ lkj_corr(3); // 3 is fairly loose prior on correlations
	tau ~ cauchy(0, 1); // no idea, Cauchy allows for potentially big errors
	to_vector(Gamma) ~ normal(0, 2); // fairly loose prior
	theta1 ~ beta(0.5, 0.5);
	theta2 ~ beta(0.5, 0.5);

	// latent variables
	for(n in 1:N) {
	Z[n] ~ multi_normal(X[n]*Gamma, quad_form_diag(Omega, tau));
	}


	// likelihood
	Y[,1] ~ neg_binomial(Z[,1], theta1);
	Y[,2] ~ neg_binomial(Z[,2], theta2);
	}"

	compiled_model <- stan_model(model_code = mod)

	# Try estimating it using penalized likelihood
	optim_test <- optimizing(compiled_model, data = list(N = nrow(Y), P = 2, P2 = 2, X = X, Y = Y))

	hmc_test <- sampling(compiled_model, data = list(N = nrow(Y), P = 2, P2 = 2, X = X, Y = Y), iter = 1000, cores = 4)
	shinystan::launch_shinystan(hmc_test)
	print(hmc_test, pars = c("Gamma", "tau", "Omega"))
	Gamma
	Omega
	tau