Skip to content

Instantly share code, notes, and snippets.

@grantmcdermott
Last active August 28, 2021 22:15
Show Gist options
  • Save grantmcdermott/a87849dfbd6a5e544dfd6bb523940817 to your computer and use it in GitHub Desktop.
Save grantmcdermott/a87849dfbd6a5e544dfd6bb523940817 to your computer and use it in GitHub Desktop.
Redoing the simulation described on Dan Millimet's blog: https://dlm-econometrics.blogspot.com/2020/08/eye-on-prize.html
library(ivreg)
## Convenience functions
rmse = function(y, yhat) sqrt(mean((y - yhat)^2))
mae = function(y, yhat) mean(abs(y - yhat))
## Function for single simulation
dm_sim = function(...) {
## Params
b0 = 1; b1 = 1; rhoXZ = 0.5; rho1 = 0.25
rho2_vec = c(0.5, 0.25, 0, -0.25, -0.5)
nobs_in = 1000; nobs_out = 100
## "In" sample draw
prms_in = matrix(c(1, rhoXZ, rho1, rhoXZ, 1, 0, rho1, 0, 1), nrow = 3, byrow = TRUE)
vars_in = MASS::mvrnorm(n = nobs_in, mu = c(x = 0, z = 0, e = 0), Sigma = prms_in)
x_in = cbind(intercept = 1, x = vars_in[, 'x'])
z_in = cbind(intercept = 1, z = vars_in[, 'z'])
y_in = b0 + tcrossprod(x_in[, 'x'], b1) + vars_in[, 'e']
## Fast OLS and IV mods (we just need the coefs for prediction)
ols_coefs = .lm.fit(x_in, y_in)$coefficients
iv_coefs = ivreg.fit(x_in, y_in, z_in)$coefficients
## Predictions for our "In" sample
yhat_ols_in = x_in %*% ols_coefs
yhat_iv_in = x_in %*% iv_coefs
## Goodness of fit measures
rmse_in = data.frame(OLS = rmse(y_in, yhat_ols_in), IV = rmse(y_in, yhat_iv_in))
mae_in = data.frame(OLS = mae(y_in, yhat_ols_in), IV = mae(y_in, yhat_iv_in))
res_in = list(RMSE = rmse_in, MAE = mae_in)
## "Out" sample draws (5 diff DGPs)
res_out = lapply(seq_along(rho2_vec), function(dgp) {
prms_out = matrix(c(1, rho2_vec[dgp], rho2_vec[dgp], 1), nrow = 2, byrow = TRUE)
vars_out = MASS::mvrnorm(n = nobs_out, mu = c(x = 0, e = 0), Sigma = prms_out)
x_out = cbind(intercept = 1, x = vars_out[, 'x']) ## add intercept to design matrix
y_out = b0 + tcrossprod(x_out[, 'x'], b1) + vars_out[, 'e']
## Predictions for our "Out" sample
yhat_ols_out = x_out %*% ols_coefs
yhat_iv_out = x_out %*% iv_coefs
## Goodness of fit measures
rmse_out = data.frame(OLS = rmse(y_out, yhat_ols_out), IV = rmse(y_out, yhat_iv_out),
DGP = dgp)
mae_out = data.frame(OLS = mae(y_out, yhat_ols_out), IV = mae(y_out, yhat_iv_out),
DGP = dgp)
return(list(RMSE = rmse_out, MAE = mae_out))
})
res_out = do.call(function(...) Map('rbind', ...), res_out)
res = do.call(function(...) Map('cbind', ...), list(IN = res_in, OUT = res_out))
}
## Function for combining multiple simulations (and 'nice' printout)
full_sim = function(n_sims, parallel = TRUE) {
if (parallel) {
sims = parallel::mclapply(1:n_sims, dm_sim, mc.cores = parallel::detectCores())
} else {
sims = lapply(1:n_sims, dm_sim)
}
sims = do.call(function(...) Map('rbind', ...), sims)
sims = lapply(sims, function(dat) {
ret = aggregate(. ~ OUT.DGP, data = dat, function(x) sprintf('%.4f', mean(x)))
names(ret)[1] = 'DGP'
return(ret)
})
return(sims)
}
## Run and time 10,000 simulations
## Takes about 8 seconds in parallel on my laptop (40 seconds in serial)
set.seed(256647624, "L'Ecuyer") ## L'Ecuyer seed for parallel option
system.time({print(full_sim(1e4, parallel = TRUE))})
# $RMSE
# DGP IN.OLS IN.IV OUT.OLS OUT.IV
# 1 1 0.9667 1.0002 0.9000 1.0003
# 2 2 0.9667 1.0002 0.9671 1.0006
# 3 3 0.9667 1.0002 1.0286 0.9998
# 4 4 0.9667 1.0002 1.0882 1.0002
# 5 5 0.9667 1.0002 1.1427 0.9984
#
# $MAE
# DGP IN.OLS IN.IV OUT.OLS OUT.IV
# 1 1 0.7716 0.7983 0.7197 0.7999
# 2 2 0.7716 0.7983 0.7739 0.8005
# 3 3 0.7716 0.7983 0.8229 0.7996
# 4 4 0.7716 0.7983 0.8703 0.7999
# 5 5 0.7716 0.7983 0.9143 0.7988
#
# user system elapsed
# 81.490 1.610 8.176
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment