Skip to content

Instantly share code, notes, and snippets.

@khakieconomics
Created March 20, 2016 22:26
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save khakieconomics/9dd785c241a1ee0b6f32 to your computer and use it in GitHub Desktop.
Save khakieconomics/9dd785c241a1ee0b6f32 to your computer and use it in GitHub Desktop.
# Some fake data
library(dplyr); library(rstan)
# Write out the data generation with known parameters
# Set the number of individuals
n_ind <- 50
# Set up the observations
my_tidy_data <- expand.grid(individual = 1:n_ind, outcome_measure_type = 1:10) %>%
as_data_frame() %>%
arrange(individual, outcome_measure_type)
# Which individuals get treated?
individual_treatment <- data_frame(individual = 1:n_ind, treatment = sample(c(0, 1), n_ind, replace = T))
# individual effects (0 mean)
# scale
tau1 <- c(0.5, 0.25)
# correlation
omega1 <- matrix(c(1, -0.2, -0.2, 1), 2, 2)
# covariance matrix
S1 <- diag(tau1) %*% omega1 %*% diag(tau1)
# Put together the individual coefficients
individual_treatment <- individual_treatment %>%
bind_cols(MASS::mvrnorm(n_ind, mu = c(0, 0), Sigma = S1) %>% as.data.frame) %>%
rename(ind_intercept = V1,
ind_TE = V2)
# Now let's put together the outcome type effects in the same way
set.seed(4321)
outcome_measure_type_effects <- data_frame(outcome_measure_type = 1:10)
# individual effects (0 mean)
# scale
tau2 <- c(0.5, 1)
# correlation
omega2 <- matrix(c(1, -0.5, -0.5, 1), 2, 2)
# covariance matrix
S2 <- diag(tau2) %*% omega2 %*% diag(tau2)
# Put together the outcome
# We'll set the true treatment effects
outcome_measure_type_effects <- outcome_measure_type_effects %>% bind_cols(MASS::mvrnorm(10, mu = c(1,1), Sigma = S2) %>% as.data.frame) %>%
rename(outcome_intercept = V1,
outcome_TE = V2)
my_tidy_data <- left_join(my_tidy_data, individual_treatment) %>%
left_join(outcome_measure_type_effects) %>%
mutate(outcome_score = ind_intercept + outcome_intercept + (ind_TE + outcome_TE)*treatment + rnorm(n(), 0, 10))
# We know the actual treatment effect is 1, but we get a very noisy estimate.
summary(lm(outcome_score~ treatment, data = my_tidy_data))
# What's more, if we run OLS on each group, we're going to get some crazy estimates.
library(broom)
est_tes_sep_models <- my_tidy_data %>% group_by(outcome_measure_type) %>%
do(tidy(lm(outcome_score~ treatment, data = .)))%>%
ungroup %>%
filter(term == "treatment")
est_tes_sep_models
# So why don't we estimate a multilevel model in Stan? ====
model_code <- "
data {
int N; // number of observations
int I; // number of individuals
int J; // number of outcome types
int Ind[N]; // individual identifiers
int outcome_measure[N]; // vector of outcome measures (pain, anxiety, etc.)
int treatment[N]; // binary indicating treatment
vector[N] outcome_score;
}
parameters {
// Regression coefficients
vector[2] mu_bj; // outcome intercept and slope
// Covariance matrices
matrix[I, 2] z1;
matrix[J, 2] z2;
cholesky_factor_corr[2] L1;
cholesky_factor_corr[2] L2;
vector<lower = 0>[2] tau1; // Scale of individual shocks
vector<lower = 0>[2] tau2; // Scale of outcome type shocks
// regression sd
real sigma;
}
transformed parameters {
matrix[I, 2] bi; // individual effects
matrix[J, 2] bj; // outcome effects
bi <- (diag_pre_multiply(tau1, L1)*z1')';
bj <- rep_matrix(to_row_vector(mu_bj), J) + (diag_pre_multiply(tau2, L2)*z2')';
}
model {
// priors
tau1 ~ cauchy(0, 1);
tau2 ~ cauchy(0, 1);
sigma ~ cauchy(0, 3);
L1 ~ lkj_corr_cholesky(2);
L2 ~ lkj_corr_cholesky(2);
mu_bj ~ normal(0, 5);
for(i in 1:I) {
z1[i] ~ normal(0, 1);
}
for(j in 1:J) {
z2[j] ~ normal(0, 1);
}
for(n in 1:N){
outcome_score[n] ~ normal(bi[Ind[n],1] + bj[outcome_measure[n], 1] + (bi[Ind[n], 2] + bj[outcome_measure[n], 2])* treatment[n], sigma);
}
}
"
data_list <- list(N = nrow(my_tidy_data), I = n_ind, J = 10, Ind = my_tidy_data$individual, outcome_measure = my_tidy_data$outcome_measure_type, treatment = my_tidy_data$treatment, outcome_score = my_tidy_data$outcome_score)
fitted_model2 <- stan(model_code = model_code, data = data_list, cores = 4, iter = 600)
# have a look at estimates ---------------------------------------------------------------------
# Did it estimate well?
shinystan::launch_shinystan(fitted_model2)
# Is our treatment effect estimated?
library(reshape2); library(ggplot2)
extract(fitted_model2, pars = c("mu_bj", "tau1", "tau2"), permuted = F) %>% plyr::adply(2) %>% select(-chains)%>% melt() %>%
ggplot(aes(x = value)) +
facet_grid(variable~.) +
geom_density(fill = "orange", alpha = 1)
# Final plot of estimates across each outcome type, actuals, and poorly-estimated effects using OLS on subsets of the data
extract(fitted_model2, pars = c("bj"), permuted = F) %>% plyr::adply(2) %>% select(-chains)%>% melt() %>%
filter(grepl("2]", variable)) %>%
group_by(variable) %>%
summarise(median = median(value),
lower = quantile(value, 0.025),
upper = quantile(value, 0.975)) %>%
ggplot(aes(x = variable)) +
geom_linerange(aes(ymin = lower, ymax = upper)) +
geom_point(aes(y = median)) +
geom_point(aes(y = outcome_measure_type_effects$outcome_TE), colour = "red") +
geom_point(aes(y = est_tes_sep_models$estimate), colour = "blue") +
ggtitle("Estimated treatment effects across different outcomes\nblue = effect from separate models\nred = true effect")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment