Skip to content

Instantly share code, notes, and snippets.

@BERENZ
Last active August 21, 2024 16:17
Show Gist options
  • Save BERENZ/01b952d52aebfd0f8616d80f46ad2dcf to your computer and use it in GitHub Desktop.
Save BERENZ/01b952d52aebfd0f8616d80f46ad2dcf to your computer and use it in GitHub Desktop.
weightit examples about coverage
## packages
library(WeightIt)
library(mvnfast)
library(marginaleffects)
######## ATE
## function taken from the paper Sant'Anna, P. H., Song, X., & Xu, Q. (2022). Covariate distribution balance via propensity scores. Journal of Applied Econometrics, 37(6), 1093-1120.
## in particular from here: http://qed.econ.queensu.ca/jae/datasets/santanna001/
cbps_sim_data <- function(n, dgp, scale.ps = 1, y_nonlin = FALSE){
#----------------------------------------------------------------------------
#----------------------------------------------------------------------------
# Kand and Schafer design - Correct model
#----------------------------------------------------------------------------
#----------------------------------------------------------------------------
if (dgp==1){
# Generate X
X1 <- stats::rnorm(n)
X2 <- stats::rnorm(n)
X3 <- stats::rnorm(n)
X4 <- stats::rnorm(n)
X <- cbind(X1,X2,X3,X4)
# Beta of pscores
b.ps <- c(-1, 0.5, -0.25, -0.1)
# Generate pscore and Treatment Status
b.ps.Times.x <- X %*% b.ps
pr <- 1/(1+exp(- scale.ps * b.ps.Times.x))
Treat <- stats::rbinom(n, size=1, prob = pr)
# the potential outcomes
# Beta of regression
b.reg <- c(27.4, 13.7, 13.7, 13.7)
b.reg.Times.x <- X %*% b.reg
if (y_nonlin) b.reg.Times.x <- (2*X[, 1] + 2*X[, 2] + X[, 3])^2
Y0 <- 200 - b.reg.Times.x + stats::rnorm(n)
Y1 <- 210 + b.reg.Times.x + stats::rnorm(n)
# Observed outcome
Y <- Y1*Treat + Y0*(1-Treat)
# Dataset
datamatrix <- data.frame(cbind(Y, Treat, X, X, pr))
colnames(datamatrix) <- c("Y", "Treat",
paste("X", 1:dim(X)[2], sep = ""),
paste("trueX", 1:dim(X)[2], sep = ""),
"truePS")
}
#----------------------------------------------------------------------------
# Kand and Schafer design - Misspecified model
#----------------------------------------------------------------------------
if (dgp == 2){
# Generate X
X1 <- stats::rnorm(n)
X2 <- stats::rnorm(n)
X3 <- stats::rnorm(n)
X4 <- stats::rnorm(n)
X <- cbind(X1,X2,X3,X4)
# Beta of pscores
b.ps <- c(-1, 0.5, -0.25, -0.1)
# Generate pscore and Treatment Status
b.ps.Times.x <- X %*% b.ps
pr <- 1/(1+exp(- scale.ps * b.ps.Times.x))
Treat <- stats::rbinom(n, size=1, prob = pr)
# the potential outcomes
# Beta of regression
b.reg <- c(27.4, 13.7, 13.7, 13.7)
b.reg.Times.x <- X %*% b.reg
Y0 <- 200 - b.reg.Times.x + stats::rnorm(n)
Y1 <- 210 + b.reg.Times.x + stats::rnorm(n)
# Observed outcome
Y <- Y1*Treat + Y0*(1-Treat)
# Dataset
W1 <- exp(X1/2)
W2 <- X2/(1+exp(X1))
W3 <- (X1*X3/25 + 0.6)^3
W4 <- (X1 + X4 + 20)^2
W <- cbind(W1, W2, W3, W4)
datamatrix <- data.frame(cbind(Y, Treat, W, X, pr))
colnames(datamatrix) <- c("Y", "Treat",
paste("X", 1:dim(X)[2], sep = ""),
paste("trueX", 1:dim(X)[2], sep = ""),
"truePS")
}
#----------------------------------------------------------------------------
#----------------------------------------------------------------------------
return(datamatrix)
}
## simulation for ATE
R <- 500
results_mis <- results <- matrix(0, ncol = 4, nrow = R)
colnames(results_mis) <- colnames(results) <- c("simple_a", "simple_hc0", "inter_a", "inter_hc0")
cbps_f <- Treat ~ X1 + X2 + X3 + X4
cbps_est1 <- Y ~ Treat
cbps_est2 <- Y ~ Treat*(X1 + X2 + X3 + X4)
effect <- 10
for (r in 1:R) {
set.seed(r)
## correctly specified
data_set <- cbps_sim_data(n = 1000, dgp=1)
## cbps
cbps_fit <- weightit(formula = cbps_f, data = data_set, estimand = "ATE", method = "cbps")
res1 <- lm_weightit(cbps_est1, data = data_set, weightit = cbps_fit) |> avg_comparisons(variables = "Treat")
res1a <- lm_weightit(cbps_est1, data = data_set, weightit = cbps_fit, vcov = "HC0") |> avg_comparisons(variables = "Treat")
res2 <- lm_weightit(cbps_est2, data = data_set, weightit = cbps_fit) |> avg_comparisons(variables = "Treat")
res2a <- lm_weightit(cbps_est2, data = data_set, weightit = cbps_fit, vcov = "HC0") |> avg_comparisons(variables = "Treat")
results[r, 1] <- res1$conf.low < effect & res1$conf.high > effect
results[r, 2] <- res1a$conf.low < effect & res1a$conf.high > effect
results[r, 3] <- res2$conf.low < effect & res2$conf.high > effect
results[r, 4] <- res2a$conf.low < effect & res2a$conf.high > effect
## mis-correctly specified
data_set <- cbps_sim_data(n = 1000, dgp=2)
## cbps
cbps_fit <- weightit(formula = cbps_f, data = data_set, estimand = "ATE", method = "cbps")
res1 <- lm_weightit(cbps_est1, data = data_set, weightit = cbps_fit) |> avg_comparisons(variables = "Treat")
res1a <- lm_weightit(cbps_est1, data = data_set, weightit = cbps_fit, vcov = "HC0") |> avg_comparisons(variables = "Treat")
res2 <- lm_weightit(cbps_est2, data = data_set, weightit = cbps_fit) |> avg_comparisons(variables = "Treat")
res2a <- lm_weightit(cbps_est2, data = data_set, weightit = cbps_fit, vcov = "HC0") |> avg_comparisons(variables = "Treat")
results_mis[r, 1] <- res1$conf.low < effect & res1$conf.high > effect
results_mis[r, 2] <- res1a$conf.low < effect & res1a$conf.high > effect
results_mis[r, 3] <- res2$conf.low < effect & res2$conf.high > effect
results_mis[r, 4] <- res2a$conf.low < effect & res2a$conf.high > effect
}
colMeans(results)
colMeans(results_mis)
##### ATT
## taken from Hainmueller, J. (2012). Entropy balancing for causal effects: A multivariate reweighting method to produce balanced samples in observational studies. Political analysis, 20(1), 25-46.
## simulation for ATT
n <- 2000 ## initial sample size
R <- 500
mu <- c(0, 0, 0)
Sigma <- matrix(c(2, 1, -1,
1, 1, -0.5,
-1,-0.5, 1),
nrow=3,
ncol=3)
fm1 <- d1 ~ x1 + x2 + x3 + x4 + x5 + x6
est1 <- y1 ~ d1
est2 <- y1 ~ d1*(x1 + x2 + x3 + x4 + x5 + x6)
mat <- matrix(0, ncol = 4, nrow = R)
colnames(mat) <- c("simple_a", "simple_hc0", "inter_a", "inter_hc0")
effect <- 0 ## effect is 0
for (r in 1:R) {
set.seed(r)
x123 <- rmvn(n=n, mu=mu, sigma=Sigma)
x1 <- x123[,1]
x2 <- x123[,2]
x3 <- x123[,3]
x4 <- runif(n,-3,3)
x5 <- rchisq(n,1)
x6 <- rbinom(n,1,0.5)
ep1 <- rnorm(n,0,30)
d1 <- as.numeric((x1 + 2*x2 -2*x3 -x4 -0.5*x5 +x6 + ep1) > 0)
y1 <- d1*effect + x1 + x2 + x3 - x4 + x5 + x6 + rnorm(n)
df <- data.frame(id =1:n, x1,x2,x3,x4,x5,x6,d1,y1)
ebal_res <- weightit(formula = fm1, data = df, estimand = "ATT", method = "ebal")
res1 <- lm_weightit(est1, data = df, weightit = ebal_res) |> avg_comparisons(variables = "d1")
res1a <- lm_weightit(est1, data = df, weightit = ebal_res, vcov = "HC0") |> avg_comparisons(variables = "d1")
res2 <- lm_weightit(est2, data = df, weightit = ebal_res) |> avg_comparisons(variables = "d1")
res2a <- lm_weightit(est2, data = df, weightit = ebal_res, vcov = "HC0") |> avg_comparisons(variables = "d1")
mat[r, 1] <- res1$conf.low < effect & res1$conf.high > effect
mat[r, 2] <- res1a$conf.low < effect & res1a$conf.high > effect
mat[r, 3] <- res2$conf.low < effect & res2$conf.high > effect
mat[r, 4] <- res2a$conf.low < effect & res2a$conf.high > effect
}
colMeans(mat) ## coverage should be 95% right but it is either 0 or 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment