Skip to content

Instantly share code, notes, and snippets.

@khakieconomics
Created September 26, 2017 23:06
Show Gist options
  • Save khakieconomics/d61e3c1be07a05361cdffca2d2fb4166 to your computer and use it in GitHub Desktop.
Save khakieconomics/d61e3c1be07a05361cdffca2d2fb4166 to your computer and use it in GitHub Desktop.
library(rstan); library(dplyr); library(ggplot2); library(reshape2)
options(mc.cores = parallel::detectCores())
set.seed(49)
# Observations
N <- 2000
# Draw errors
rho <- 0.5
Sigma <- matrix(c(1, rho, rho, 1), 2, 2)
errors <- MASS::mvrnorm(N, c(0, 0), Sigma)
# Draw some fake covariates
P <- 5
P2 <- 3
X <- cbind(1, matrix(rnorm((P-1)*N), N, P-1))
Z <- matrix(rnorm(P2*N), N, P2)
# Some fake parameters
beta <- rnorm(P + P2)
gamma <- rnorm(P + 1)
# Make Y
Ystar1 <- cbind(X, Z) %*% beta + errors[,1]
Y1 = as.numeric(Ystar1 > 0 )
# Make covariates for second stage
X2 <- cbind(Y1, X)
# Make outcome
Ystar2 <- X2 %*% gamma + errors[,2]
Y2 <- as.numeric(Ystar2 > 0)
# Create data
Y <- cbind(Y1, Y2)
cor(Y)
# Does regular old probit work?
estimates <- coef(glm(Y[,2] ~ . - 1, data = as.data.frame(X2), family = binomial(link = "probit")))
# let's look at the estimates
data.frame(gamma, estimates) %>%
mutate(endo_regressor = 1:n()==1) %>%
ggplot(aes(gamma, estimates, colour = endo_regressor)) +
geom_point() +
labs(title = "There's pretty clearly some bias")
# Pretty clearly there's a bias!
# Stan time ---------------------------------------------------------------
# Let's run Stan
data_list <- list(N = N, P = P, P2 = P2, X = X, Z = Z, Y = as.data.frame(Y))
compiled_model <- stan_model("biprobit.stan")
estimated_model <- sampling(compiled_model, data = data_list, iter = 500)
# Look at the estimates relative to the actuals (gamma[1] was biased)
estimated_model
gamma
# Let's look at the estimates
as.data.frame(estimated_model, pars = c("gamma","beta")) %>%
melt() %>%
group_by(variable) %>%
summarise(median = median(value),
lower = quantile(value, .025),
upper = quantile(value, .975)) %>%
mutate(true_values = c(gamma, beta),
size = ifelse(1:n()==1, 3, 1)) %>%
ggplot(aes(x = median)) +
geom_linerange(aes(ymin = lower, ymax = upper), colour = "red") +
geom_point(aes(y = median), colour = "red") +
geom_point(aes(y = true_values, colour = size)) +
labs(title = "Muuuuch better")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment