Last active
August 21, 2024 16:17
-
-
Save BERENZ/01b952d52aebfd0f8616d80f46ad2dcf to your computer and use it in GitHub Desktop.
weightit examples about coverage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## packages | |
library(WeightIt) | |
library(mvnfast) | |
library(marginaleffects) | |
######## ATE | |
## function taken from the paper Sant'Anna, P. H., Song, X., & Xu, Q. (2022). Covariate distribution balance via propensity scores. Journal of Applied Econometrics, 37(6), 1093-1120. | |
## in particular from here: http://qed.econ.queensu.ca/jae/datasets/santanna001/ | |
cbps_sim_data <- function(n, dgp, scale.ps = 1, y_nonlin = FALSE){ | |
#---------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------- | |
# Kand and Schafer design - Correct model | |
#---------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------- | |
if (dgp==1){ | |
# Generate X | |
X1 <- stats::rnorm(n) | |
X2 <- stats::rnorm(n) | |
X3 <- stats::rnorm(n) | |
X4 <- stats::rnorm(n) | |
X <- cbind(X1,X2,X3,X4) | |
# Beta of pscores | |
b.ps <- c(-1, 0.5, -0.25, -0.1) | |
# Generate pscore and Treatment Status | |
b.ps.Times.x <- X %*% b.ps | |
pr <- 1/(1+exp(- scale.ps * b.ps.Times.x)) | |
Treat <- stats::rbinom(n, size=1, prob = pr) | |
# the potential outcomes | |
# Beta of regression | |
b.reg <- c(27.4, 13.7, 13.7, 13.7) | |
b.reg.Times.x <- X %*% b.reg | |
if (y_nonlin) b.reg.Times.x <- (2*X[, 1] + 2*X[, 2] + X[, 3])^2 | |
Y0 <- 200 - b.reg.Times.x + stats::rnorm(n) | |
Y1 <- 210 + b.reg.Times.x + stats::rnorm(n) | |
# Observed outcome | |
Y <- Y1*Treat + Y0*(1-Treat) | |
# Dataset | |
datamatrix <- data.frame(cbind(Y, Treat, X, X, pr)) | |
colnames(datamatrix) <- c("Y", "Treat", | |
paste("X", 1:dim(X)[2], sep = ""), | |
paste("trueX", 1:dim(X)[2], sep = ""), | |
"truePS") | |
} | |
#---------------------------------------------------------------------------- | |
# Kand and Schafer design - Misspecified model | |
#---------------------------------------------------------------------------- | |
if (dgp == 2){ | |
# Generate X | |
X1 <- stats::rnorm(n) | |
X2 <- stats::rnorm(n) | |
X3 <- stats::rnorm(n) | |
X4 <- stats::rnorm(n) | |
X <- cbind(X1,X2,X3,X4) | |
# Beta of pscores | |
b.ps <- c(-1, 0.5, -0.25, -0.1) | |
# Generate pscore and Treatment Status | |
b.ps.Times.x <- X %*% b.ps | |
pr <- 1/(1+exp(- scale.ps * b.ps.Times.x)) | |
Treat <- stats::rbinom(n, size=1, prob = pr) | |
# the potential outcomes | |
# Beta of regression | |
b.reg <- c(27.4, 13.7, 13.7, 13.7) | |
b.reg.Times.x <- X %*% b.reg | |
Y0 <- 200 - b.reg.Times.x + stats::rnorm(n) | |
Y1 <- 210 + b.reg.Times.x + stats::rnorm(n) | |
# Observed outcome | |
Y <- Y1*Treat + Y0*(1-Treat) | |
# Dataset | |
W1 <- exp(X1/2) | |
W2 <- X2/(1+exp(X1)) | |
W3 <- (X1*X3/25 + 0.6)^3 | |
W4 <- (X1 + X4 + 20)^2 | |
W <- cbind(W1, W2, W3, W4) | |
datamatrix <- data.frame(cbind(Y, Treat, W, X, pr)) | |
colnames(datamatrix) <- c("Y", "Treat", | |
paste("X", 1:dim(X)[2], sep = ""), | |
paste("trueX", 1:dim(X)[2], sep = ""), | |
"truePS") | |
} | |
#---------------------------------------------------------------------------- | |
#---------------------------------------------------------------------------- | |
return(datamatrix) | |
} | |
## simulation for ATE | |
R <- 500 | |
results_mis <- results <- matrix(0, ncol = 4, nrow = R) | |
colnames(results_mis) <- colnames(results) <- c("simple_a", "simple_hc0", "inter_a", "inter_hc0") | |
cbps_f <- Treat ~ X1 + X2 + X3 + X4 | |
cbps_est1 <- Y ~ Treat | |
cbps_est2 <- Y ~ Treat*(X1 + X2 + X3 + X4) | |
effect <- 10 | |
for (r in 1:R) { | |
set.seed(r) | |
## correctly specified | |
data_set <- cbps_sim_data(n = 1000, dgp=1) | |
## cbps | |
cbps_fit <- weightit(formula = cbps_f, data = data_set, estimand = "ATE", method = "cbps") | |
res1 <- lm_weightit(cbps_est1, data = data_set, weightit = cbps_fit) |> avg_comparisons(variables = "Treat") | |
res1a <- lm_weightit(cbps_est1, data = data_set, weightit = cbps_fit, vcov = "HC0") |> avg_comparisons(variables = "Treat") | |
res2 <- lm_weightit(cbps_est2, data = data_set, weightit = cbps_fit) |> avg_comparisons(variables = "Treat") | |
res2a <- lm_weightit(cbps_est2, data = data_set, weightit = cbps_fit, vcov = "HC0") |> avg_comparisons(variables = "Treat") | |
results[r, 1] <- res1$conf.low < effect & res1$conf.high > effect | |
results[r, 2] <- res1a$conf.low < effect & res1a$conf.high > effect | |
results[r, 3] <- res2$conf.low < effect & res2$conf.high > effect | |
results[r, 4] <- res2a$conf.low < effect & res2a$conf.high > effect | |
## mis-correctly specified | |
data_set <- cbps_sim_data(n = 1000, dgp=2) | |
## cbps | |
cbps_fit <- weightit(formula = cbps_f, data = data_set, estimand = "ATE", method = "cbps") | |
res1 <- lm_weightit(cbps_est1, data = data_set, weightit = cbps_fit) |> avg_comparisons(variables = "Treat") | |
res1a <- lm_weightit(cbps_est1, data = data_set, weightit = cbps_fit, vcov = "HC0") |> avg_comparisons(variables = "Treat") | |
res2 <- lm_weightit(cbps_est2, data = data_set, weightit = cbps_fit) |> avg_comparisons(variables = "Treat") | |
res2a <- lm_weightit(cbps_est2, data = data_set, weightit = cbps_fit, vcov = "HC0") |> avg_comparisons(variables = "Treat") | |
results_mis[r, 1] <- res1$conf.low < effect & res1$conf.high > effect | |
results_mis[r, 2] <- res1a$conf.low < effect & res1a$conf.high > effect | |
results_mis[r, 3] <- res2$conf.low < effect & res2$conf.high > effect | |
results_mis[r, 4] <- res2a$conf.low < effect & res2a$conf.high > effect | |
} | |
colMeans(results) | |
colMeans(results_mis) | |
##### ATT | |
## taken from Hainmueller, J. (2012). Entropy balancing for causal effects: A multivariate reweighting method to produce balanced samples in observational studies. Political analysis, 20(1), 25-46. | |
## simulation for ATT | |
n <- 2000 ## initial sample size | |
R <- 500 | |
mu <- c(0, 0, 0) | |
Sigma <- matrix(c(2, 1, -1, | |
1, 1, -0.5, | |
-1,-0.5, 1), | |
nrow=3, | |
ncol=3) | |
fm1 <- d1 ~ x1 + x2 + x3 + x4 + x5 + x6 | |
est1 <- y1 ~ d1 | |
est2 <- y1 ~ d1*(x1 + x2 + x3 + x4 + x5 + x6) | |
mat <- matrix(0, ncol = 4, nrow = R) | |
colnames(mat) <- c("simple_a", "simple_hc0", "inter_a", "inter_hc0") | |
effect <- 0 ## effect is 0 | |
for (r in 1:R) { | |
set.seed(r) | |
x123 <- rmvn(n=n, mu=mu, sigma=Sigma) | |
x1 <- x123[,1] | |
x2 <- x123[,2] | |
x3 <- x123[,3] | |
x4 <- runif(n,-3,3) | |
x5 <- rchisq(n,1) | |
x6 <- rbinom(n,1,0.5) | |
ep1 <- rnorm(n,0,30) | |
d1 <- as.numeric((x1 + 2*x2 -2*x3 -x4 -0.5*x5 +x6 + ep1) > 0) | |
y1 <- d1*effect + x1 + x2 + x3 - x4 + x5 + x6 + rnorm(n) | |
df <- data.frame(id =1:n, x1,x2,x3,x4,x5,x6,d1,y1) | |
ebal_res <- weightit(formula = fm1, data = df, estimand = "ATT", method = "ebal") | |
res1 <- lm_weightit(est1, data = df, weightit = ebal_res) |> avg_comparisons(variables = "d1") | |
res1a <- lm_weightit(est1, data = df, weightit = ebal_res, vcov = "HC0") |> avg_comparisons(variables = "d1") | |
res2 <- lm_weightit(est2, data = df, weightit = ebal_res) |> avg_comparisons(variables = "d1") | |
res2a <- lm_weightit(est2, data = df, weightit = ebal_res, vcov = "HC0") |> avg_comparisons(variables = "d1") | |
mat[r, 1] <- res1$conf.low < effect & res1$conf.high > effect | |
mat[r, 2] <- res1a$conf.low < effect & res1a$conf.high > effect | |
mat[r, 3] <- res2$conf.low < effect & res2$conf.high > effect | |
mat[r, 4] <- res2a$conf.low < effect & res2a$conf.high > effect | |
} | |
colMeans(mat) ## coverage should be 95% right but it is either 0 or 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment