khakieconomics/hierarchical_model_example.R

## hierarchical_model_example.R

# Some fake data

library(dplyr); library(rstan)

# Write out the data generation with known parameters

# Set the number of individuals
n_ind <- 50

# Set up the observations
my_tidy_data <- expand.grid(individual = 1:n_ind, outcome_measure_type = 1:10) %>%
  as_data_frame() %>%
  arrange(individual, outcome_measure_type)

# Which individuals get treated?
individual_treatment <- data_frame(individual = 1:n_ind, treatment = sample(c(0, 1), n_ind, replace = T))

# individual effects (0 mean)
# scale
tau1 <- c(0.5, 0.25)
# correlation
omega1 <- matrix(c(1, -0.2, -0.2, 1), 2, 2)
# covariance matrix
S1 <- diag(tau1) %*% omega1 %*% diag(tau1)

# Put together the individual coefficients
individual_treatment <- individual_treatment %>%
  bind_cols(MASS::mvrnorm(n_ind, mu = c(0, 0), Sigma = S1) %>% as.data.frame) %>%
  rename(ind_intercept = V1,
         ind_TE = V2)


# Now let's put together the outcome type effects in the same way

set.seed(4321)
outcome_measure_type_effects <- data_frame(outcome_measure_type = 1:10)

# individual effects (0 mean)
# scale
tau2 <- c(0.5, 1)
# correlation
omega2 <- matrix(c(1, -0.5, -0.5, 1), 2, 2)
# covariance matrix
S2 <- diag(tau2) %*% omega2 %*% diag(tau2)

# Put together the outcome
# We'll set the true treatment effects
outcome_measure_type_effects <- outcome_measure_type_effects %>% bind_cols(MASS::mvrnorm(10, mu = c(1,1), Sigma = S2) %>% as.data.frame) %>%
  rename(outcome_intercept = V1,
         outcome_TE = V2)


my_tidy_data <- left_join(my_tidy_data, individual_treatment) %>%
  left_join(outcome_measure_type_effects) %>%
  mutate(outcome_score = ind_intercept + outcome_intercept + (ind_TE + outcome_TE)*treatment + rnorm(n(), 0, 10))

# We know the actual treatment effect is 1, but we get a very noisy estimate.
summary(lm(outcome_score~ treatment, data = my_tidy_data))

# What's more, if we run OLS on each group, we're going to get some crazy estimates.
library(broom)

est_tes_sep_models <- my_tidy_data %>% group_by(outcome_measure_type) %>%
  do(tidy(lm(outcome_score~ treatment, data = .)))%>%
  ungroup %>%
  filter(term == "treatment")

est_tes_sep_models


# So why don't we estimate a multilevel model in Stan? ====

model_code <- "
data {
  int N; // number of observations
  int I; // number of individuals
  int J; // number of outcome types
  int Ind[N]; // individual identifiers
  int outcome_measure[N]; // vector of outcome measures (pain, anxiety, etc.)
  int treatment[N]; // binary indicating treatment
  vector[N] outcome_score;
}
parameters {
  // Regression coefficients

  vector[2] mu_bj; // outcome intercept and slope

  // Covariance matrices
  matrix[I, 2] z1;
  matrix[J, 2] z2;
  cholesky_factor_corr[2] L1;
  cholesky_factor_corr[2] L2;

  vector<lower = 0>[2] tau1; // Scale of individual shocks
  vector<lower = 0>[2] tau2; // Scale of outcome type shocks

  // regression sd
  real sigma;
}
transformed parameters {

  matrix[I, 2] bi; // individual effects
  matrix[J, 2] bj; // outcome effects

  bi <- (diag_pre_multiply(tau1, L1)*z1')';
  bj <- rep_matrix(to_row_vector(mu_bj), J) + (diag_pre_multiply(tau2, L2)*z2')';
}

model {

  // priors
  tau1 ~ cauchy(0, 1);
  tau2 ~ cauchy(0, 1);
  sigma ~ cauchy(0, 3);

  L1 ~ lkj_corr_cholesky(2);
  L2 ~ lkj_corr_cholesky(2);

  mu_bj ~ normal(0, 5);

   for(i in 1:I) {
    z1[i] ~ normal(0, 1);
   }
   for(j in 1:J) {
     z2[j] ~ normal(0, 1);
   }

  for(n in 1:N){
    outcome_score[n] ~ normal(bi[Ind[n],1] + bj[outcome_measure[n], 1] + (bi[Ind[n], 2] + bj[outcome_measure[n], 2])* treatment[n], sigma);
  }
}
"


data_list <- list(N = nrow(my_tidy_data), I = n_ind, J = 10, Ind = my_tidy_data$individual, outcome_measure = my_tidy_data$outcome_measure_type, treatment = my_tidy_data$treatment, outcome_score = my_tidy_data$outcome_score)
fitted_model2 <- stan(model_code = model_code, data = data_list, cores = 4, iter = 600)

# have a look at estimates ---------------------------------------------------------------------


# Did it estimate well?
shinystan::launch_shinystan(fitted_model2)


# Is our treatment effect estimated?
library(reshape2); library(ggplot2)
extract(fitted_model2, pars = c("mu_bj", "tau1", "tau2"), permuted = F) %>% plyr::adply(2) %>% select(-chains)%>% melt() %>%
  ggplot(aes(x = value)) +
  facet_grid(variable~.) +
  geom_density(fill = "orange", alpha = 1)


# Final plot of estimates across each outcome type, actuals, and poorly-estimated effects using OLS on subsets of the data

extract(fitted_model2, pars = c("bj"), permuted = F) %>% plyr::adply(2) %>% select(-chains)%>% melt() %>%
  filter(grepl("2]", variable)) %>%
  group_by(variable) %>%
  summarise(median = median(value),
            lower = quantile(value, 0.025),
            upper = quantile(value, 0.975)) %>%
  ggplot(aes(x = variable)) +
  geom_linerange(aes(ymin = lower, ymax = upper)) +
  geom_point(aes(y = median)) +
  geom_point(aes(y = outcome_measure_type_effects$outcome_TE), colour = "red") +
  geom_point(aes(y = est_tes_sep_models$estimate), colour = "blue") +
  ggtitle("Estimated treatment effects across different outcomes\nblue = effect from separate models\nred = true effect")

	# Some fake data

	library(dplyr); library(rstan)

	# Write out the data generation with known parameters

	# Set the number of individuals
	n_ind <- 50

	# Set up the observations
	my_tidy_data <- expand.grid(individual = 1:n_ind, outcome_measure_type = 1:10) %>%
	as_data_frame() %>%
	arrange(individual, outcome_measure_type)

	# Which individuals get treated?
	individual_treatment <- data_frame(individual = 1:n_ind, treatment = sample(c(0, 1), n_ind, replace = T))

	# individual effects (0 mean)
	# scale
	tau1 <- c(0.5, 0.25)
	# correlation
	omega1 <- matrix(c(1, -0.2, -0.2, 1), 2, 2)
	# covariance matrix
	S1 <- diag(tau1) %% omega1 %% diag(tau1)

	# Put together the individual coefficients
	individual_treatment <- individual_treatment %>%
	bind_cols(MASS::mvrnorm(n_ind, mu = c(0, 0), Sigma = S1) %>% as.data.frame) %>%
	rename(ind_intercept = V1,
	ind_TE = V2)



	# Now let's put together the outcome type effects in the same way

	set.seed(4321)
	outcome_measure_type_effects <- data_frame(outcome_measure_type = 1:10)

	# individual effects (0 mean)
	# scale
	tau2 <- c(0.5, 1)
	# correlation
	omega2 <- matrix(c(1, -0.5, -0.5, 1), 2, 2)
	# covariance matrix
	S2 <- diag(tau2) %% omega2 %% diag(tau2)

	# Put together the outcome
	# We'll set the true treatment effects
	outcome_measure_type_effects <- outcome_measure_type_effects %>% bind_cols(MASS::mvrnorm(10, mu = c(1,1), Sigma = S2) %>% as.data.frame) %>%
	rename(outcome_intercept = V1,
	outcome_TE = V2)



	my_tidy_data <- left_join(my_tidy_data, individual_treatment) %>%
	left_join(outcome_measure_type_effects) %>%
	mutate(outcome_score = ind_intercept + outcome_intercept + (ind_TE + outcome_TE)*treatment + rnorm(n(), 0, 10))

	# We know the actual treatment effect is 1, but we get a very noisy estimate.
	summary(lm(outcome_score~ treatment, data = my_tidy_data))

	# What's more, if we run OLS on each group, we're going to get some crazy estimates.
	library(broom)

	est_tes_sep_models <- my_tidy_data %>% group_by(outcome_measure_type) %>%
	do(tidy(lm(outcome_score~ treatment, data = .)))%>%
	ungroup %>%
	filter(term == "treatment")

	est_tes_sep_models


	# So why don't we estimate a multilevel model in Stan? ====

	model_code <- "
	data {
	int N; // number of observations
	int I; // number of individuals
	int J; // number of outcome types
	int Ind[N]; // individual identifiers
	int outcome_measure[N]; // vector of outcome measures (pain, anxiety, etc.)
	int treatment[N]; // binary indicating treatment
	vector[N] outcome_score;
	}
	parameters {
	// Regression coefficients

	vector[2] mu_bj; // outcome intercept and slope

	// Covariance matrices
	matrix[I, 2] z1;
	matrix[J, 2] z2;
	cholesky_factor_corr[2] L1;
	cholesky_factor_corr[2] L2;

	vector<lower = 0>[2] tau1; // Scale of individual shocks
	vector<lower = 0>[2] tau2; // Scale of outcome type shocks

	// regression sd
	real sigma;
	}
	transformed parameters {

	matrix[I, 2] bi; // individual effects
	matrix[J, 2] bj; // outcome effects

	bi <- (diag_pre_multiply(tau1, L1)*z1')';
	bj <- rep_matrix(to_row_vector(mu_bj), J) + (diag_pre_multiply(tau2, L2)*z2')';
	}

	model {

	// priors
	tau1 ~ cauchy(0, 1);
	tau2 ~ cauchy(0, 1);
	sigma ~ cauchy(0, 3);

	L1 ~ lkj_corr_cholesky(2);
	L2 ~ lkj_corr_cholesky(2);

	mu_bj ~ normal(0, 5);

	for(i in 1:I) {
	z1[i] ~ normal(0, 1);
	}
	for(j in 1:J) {
	z2[j] ~ normal(0, 1);
	}

	for(n in 1:N){
	outcome_score[n] ~ normal(bi[Ind[n],1] + bj[outcome_measure[n], 1] + (bi[Ind[n], 2] + bj[outcome_measure[n], 2])* treatment[n], sigma);
	}
	}
	"


	data_list <- list(N = nrow(my_tidy_data), I = n_ind, J = 10, Ind = my_tidy_data$individual, outcome_measure = my_tidy_data$outcome_measure_type, treatment = my_tidy_data$treatment, outcome_score = my_tidy_data$outcome_score)
	fitted_model2 <- stan(model_code = model_code, data = data_list, cores = 4, iter = 600)

	# have a look at estimates ---------------------------------------------------------------------


	# Did it estimate well?
	shinystan::launch_shinystan(fitted_model2)


	# Is our treatment effect estimated?
	library(reshape2); library(ggplot2)
	extract(fitted_model2, pars = c("mu_bj", "tau1", "tau2"), permuted = F) %>% plyr::adply(2) %>% select(-chains)%>% melt() %>%
	ggplot(aes(x = value)) +
	facet_grid(variable~.) +
	geom_density(fill = "orange", alpha = 1)



	# Final plot of estimates across each outcome type, actuals, and poorly-estimated effects using OLS on subsets of the data

	extract(fitted_model2, pars = c("bj"), permuted = F) %>% plyr::adply(2) %>% select(-chains)%>% melt() %>%
	filter(grepl("2]", variable)) %>%
	group_by(variable) %>%
	summarise(median = median(value),
	lower = quantile(value, 0.025),
	upper = quantile(value, 0.975)) %>%
	ggplot(aes(x = variable)) +
	geom_linerange(aes(ymin = lower, ymax = upper)) +
	geom_point(aes(y = median)) +
	geom_point(aes(y = outcome_measure_type_effects$outcome_TE), colour = "red") +
	geom_point(aes(y = est_tes_sep_models$estimate), colour = "blue") +
	ggtitle("Estimated treatment effects across different outcomes\nblue = effect from separate models\nred = true effect")