Skip to content

Instantly share code, notes, and snippets.

@khakieconomics
Last active February 17, 2019 15:35
Show Gist options
  • Save khakieconomics/1faab8f2560b1726c74d082a79def29f to your computer and use it in GitHub Desktop.
Save khakieconomics/1faab8f2560b1726c74d082a79def29f to your computer and use it in GitHub Desktop.
library(rstanarm); library(tidyverse)
options(mc.cores = parallel::detectCores())
set.seed(42)
data("radon")
head(treatment_sample)
# Some levels have no variance in the outcomes, making likelihood estimates impossible
# Adding a tiny bit of noise fixes the problem
radon$log_uranium <- rnorm(nrow(radon), radon$log_uranium, 0.05)
# Split the data
sample_split <- sample(1:nrow(radon), round(nrow(radon)/2))
outcome_sample <- radon[sample_split,]
treatment_sample <- radon[-sample_split,]
# Fit the outcome model
summary(lm(log_uranium ~ log_radon + floor + county, data = treatment_sample))
outcome_model <- stan_lmer(log_uranium ~ log_radon + (1 | county),
data = treatment_sample,
cores= 4, iter = 1000,
prior = student_t(df = 5, location = 0, scale = 0.5),
prior_intercept = student_t(df = 7, location = 0, scale = 1),
prior_aux = student_t(df = 4, scale = 1))
# Fit the treatment model
treatment_model <- stan_glmer(floor ~ log_radon + (1 | county),
data = treatment_sample, cores = 4,
iter = 1000,
family = binomial(link = "logit"),
prior = student_t(df = 5, location = 0, scale = 0.5),
prior_intercept = student_t(df = 3, location = 0, scale = 2))
# Get posterior mean predictions for treatments and outcomes
treatment_predictions <- t(posterior_linpred(treatment_model, newdata = outcome_sample, transform = T))
hist(treatment_predictions)
outcome_predictions <- t(posterior_linpred(outcome_model,newdata = outcome_sample))
hist(outcome_predictions)
# This contains the residuals from the treatment model applied to the outcome dataset.
# Note there are 2000 columns---a separate vector of residuals for every draw from the posterior.
treatment_errors <- matrix(outcome_sample$floor, nrow(outcome_sample), ncol(treatment_predictions)) - treatment_predictions
outcome_errors <- matrix(outcome_sample$log_uranium, nrow(outcome_sample), ncol(treatment_predictions)) - outcome_predictions
# Get the theta estimates for each set of residuals
thetahat <- sapply(1:2000, function(i) coef(lm(outcome_errors[,i] ~ -1 + treatment_errors[,i])))
# And get our posterior interval
ATE_bayes <- mean(thetahat)
confint_bayes <- quantile(thetahat, c(0.025, 0.975))
# Let’s now do double-ML with Random Forest -------------------------------
library(randomForest)
# One-hot encode the categorical features
radon_onehot <- model.matrix(~ ., data = radon)[,-1]
# Perform a split
onehot_outcome <- radon_onehot[sample_split,]
onehot_treatment <- radon_onehot[-sample_split,]
# Fit models and construct residuals
treatment_model_rf <- randomForest(factor(floor) ~ . - log_uranium, data = onehot_treatment)
outcome_model_rf <- randomForest(log_uranium ~ . - floor, data = onehot_treatment, mtry = 15)
resid_outcome_rf <- onehot_outcome[,"log_uranium"] - predict(outcome_model_rf, newdata = onehot_outcome)
resid_treatment_rf <- onehot_outcome[,"floor"] - predict(treatment_model_rf, newdata = onehot_outcome, type ="prob")[,2]
# Get our treatment effect estimate
fit_rf <- lm(resid_outcome_rf ~ -1 + resid_treatment_rf)
se <- (1/mean(resid_treatment_rf^2))^2 * mean(resid_treatment_rf^2 * resid(fit_rf)^2)
rf_confint <- c(coef(fit_rf) - 1.96*se, coef(fit_rf) + 1.96*se)
# Now a Naive BART implementation -----------------------------------------
library(BART)
y_train <- radon_onehot[,"log_uranium"]
x_train <- radon_onehot[,-ncol(radon_onehot)]
bart_est <- wbart(x.train = x_train, y.train = y_train, sparse = T)
x_test_1 <- x_train
x_test_1[,"floor"] <- 1
x_test_0 <- x_train
x_test_0[,"floor"] <- 0
y_pred_1 <- predict(bart_est, newdata = x_test_1)
y_pred_0 <- predict(bart_est, newdata = x_test_0)
# Difference = posterior of prediction
Ind_TE <- y_pred_1 - y_pred_0
ATE <- rowMeans(Ind_TE)
hist(ATE)
ATE_mean <- mean(ATE)
ATE_confint <- quantile(ATE, c(0.025, 0.975))
# Visualize comparative effects -------------------------------------------
data_frame(model = c("Bayesian hierarchical",
"RF Double ML",
"BART"),
mean = c(ATE_bayes, coef(fit_rf), ATE_mean),
lower = c(confint_bayes[1], confint(fit_rf)[1], ATE_confint[1]),
upper = c(confint_bayes[2], confint(fit_rf)[2], ATE_confint[2])) %>%
ggplot(aes(x = model)) +
geom_linerange(aes(ymin = lower, ymax = upper), colour = "green") +
geom_point(aes(y = mean), size = 2) +
theme_bw()
# So it seems the Bayesian Hierarchical estimate is way more precise. Why might this be the case?
error_scales <- data_frame(model = c("treatment model", "outcome model"),
`error scale bayes` = c(sd(treatment_errors), sd(outcome_errors)),
`error scale RF` = c(sd(resid_treatment_rf), sd(resid_outcome_rf)))
error_scales %>%
gather(measure, value, -model) %>%
ggplot(aes(x = model)) +
geom_point(aes(y = value, colour = measure), size = 2)
# There we go: because the error for the hierarchical bayes model is so much smaller,
# we get a way narrower confidence band. This is a feature of the well-estimated random effects
# doing such a good job for prediction
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment