Skip to content

Instantly share code, notes, and snippets.

@khakieconomics
Last active July 6, 2018 21:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save khakieconomics/81222987d3c262890e61a7edfb2f8dfb to your computer and use it in GitHub Desktop.
Save khakieconomics/81222987d3c262890e61a7edfb2f8dfb to your computer and use it in GitHub Desktop.
File to run/visualise GP_IV.stan
# First load the libraries and define a function to create a
library(tidyverse); library(rstan)
set.seed(78)
ard_Sigma <- function(features, rho, alpha) {
noise <- 1e-8
features <- as.data.frame(features)
P <- ncol(features)
if(length(rho) != P) {
stop("The length of rho must equal the number of features")
}
N <- nrow(features)
outer_prods <- lapply(1:P, function(i) (1/rho[i]^2) * outer(features[,i], features[,i], "-")^2)
sum_of_them <- Reduce("+", outer_prods)
alpha^2 * exp(-.5 * sum_of_them) + noise^2
}
# Now let's simulate some data
# How many observations?
N <- 50
# Generate treatment effects ----------------------------------------------
# First we construct the covariates across which we expect the treatment effect to vary
P <- 3
some_features <- data.frame(Year = c(sample(1940:1990, N*.2, replace = T), sample(1991:2018, N*.8, replace = T)),
lat = rnorm(N, -6.5, 1),
lon = rnorm(N, 37, 2))
# these have awkward scales so let's scale them
some_features <- scale(some_features)
# How important will each be in the non-linearity of the treatment effect in the covariates?
rho <- (apply(some_features, 2, max) - apply(some_features, 2, min))/exp(1.5*runif(3))
# Now let's s simulate the treatment effects
alpha <- 3
# Generate the covariance matrix
Sigma <-ard_Sigma(some_features, rho, alpha)
# Draw from the appropriately parameterized Gaussian
tau <- MASS::mvrnorm(1, rep(0, N), Sigma)
# Generate structural shocks ----------------------------------------------
# We need structural shocks for both outcomes
Omega <- cor(matrix(rnorm(6), 3, 2))
scale <- exp(rnorm(2, 1, 1))
structural_shocks <- MASS::mvrnorm(N, rep(0, 2), diag(scale) %*% Omega %*% diag(scale))
# Generate instruments, covariates, endog treatment -----------------------
P3 <- 4 # number of exogenous covariates
P2 <- 3 # number of instruments
# coefficients
gamma <- rnorm(P2)
beta_1 <- rnorm(P3)
beta_2 <- rnorm(P3)
intercepts <- rnorm(2)
# Generate instruments
Z <- matrix(rnorm(N*P2), N, P2)
Covariates <- matrix(rnorm(N*P3), N, P3)
# Generate treatment
treatment <- intercepts[1] + Z %*% gamma + Covariates %*% beta_1 + structural_shocks[,1]
# Generate outcomes -------------------------------------------------------
outcomes <- intercepts[2] + tau * treatment + Covariates %*% beta_2 + structural_shocks[,2]
# Simple analysis ---------------------------------------------------------
ATE <- mean(tau)
ATE
ols_fit <- lm(outcomes ~ treatment + ., data = data.frame(Covariates))
iv_fit <- AER::ivreg(outcomes ~ treatment + X1 + X2 + X3 + X4 | X1 + X2 + X3 + X4 + X1.1 + X2.1 + X3.1, data = data.frame(Covariates, Z))
summary(ols_fit)
summary(iv_fit)
# Using the heterogeneous treatment effects model -------------------------
library(rstan)
# Compile the model
compiled_model <- stan_model("../stan/GP_treatment_IV.stan")
# make the data list for Stan
data_list <- list(N = N,
P = P,
P2 = P2,
P3 = P3,
Z = Z,
X = some_features,
Y = as.numeric(outcomes),
treatment = as.numeric(treatment))
#model_fit <- vb(compiled_model, data= data_list)
model_fit <- sampling(compiled_model, data = data_list, cores = 4, iter = 600)
model_fit_mcmc <- model_fit
# model_fit <- vb(compiled_model, data = data_list)
# Print the estimates
print(model_fit, pars = c("ATE","alpha", "beta", "gamma", "scale", "intercept", "rho"))
# Let's compare posterior mean treatment effects to the generative values
tau_draws <- as.data.frame(model_fit, pars = "tau")
plot(colMeans(tau_draws), tau)
abline(0, 1)
# Let’s plot what we’d get with 2SLS vs this method -----------------------
ggplot() +
geom_histogram(data= as.data.frame(model_fit, pars = "ATE"), aes(x = ATE, y = ..density..)) +
stat_function(data = data.frame(x = (coef(iv_fit)[2] - 2*summary(iv_fit)$coef[2,2]):(coef(iv_fit)[2] + 2*summary(iv_fit)$coef[2,2]) ), aes(x = x), fun = dnorm, args = list(mean = coef(iv_fit)[2], summary(iv_fit)$coef[2,2]), colour = "red") +
ggthemes::theme_hc() +
geom_vline(aes(xintercept = .GlobalEnv$ATE), colour = "red") +
labs(title = "Our method vs 2SLS",
subtitle = "2SLS implied density in red, our draws in black")
NULL
#
# # Let’s look at the values of the treatment effects across the mar --------
#
# # Margin 1
#
# plot(some_features[,1], get_posterior_mean(model_fit, pars = "tau")[,5])
# plot(some_features[,1], tau)
#
# # Margin 2
#
# plot(some_features[,2], get_posterior_mean(model_fit, pars = "tau")[,5])
# plot(some_features[,2], tau)
#
#
# # Margin 3
# plot(some_features[,3], get_posterior_mean(model_fit, pars = "tau")[,5])
# plot(some_features[,3], tau)
#
# hist(tau)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment