Skip to content

Instantly share code, notes, and snippets.

@bgall
Last active February 9, 2023 22:00
Show Gist options
  • Save bgall/f3eb58339909dba2e46f99e3aba21ebe to your computer and use it in GitHub Desktop.
Save bgall/f3eb58339909dba2e46f99e3aba21ebe to your computer and use it in GitHub Desktop.
Show bias in covariate adjustment and linear combination "issue"
library(dplyr)
# Wrap data simulation in a function
sim_data <- function(N) {
###########################################################################
# parameters
###########################################################################
set.seed(123)
ages <- 16:30
control_mean <- 0
control_sd <- 1
teffect_mean_mean <- 1
teffect_mean_sd <- 1
teffect_sd_rate <- 1
###########################################################################
# Simulate data
###########################################################################
# Age, individual treatment effects, and row identifiers
dat <- tibble(
age = sample(ages, size = N, replace = T),
age_cat = cut(
age,
breaks = 3,
labels = c("low", "med", "high")
),
het_teffect_mean = rnorm(n = N, mean = teffect_mean_mean, sd = teffect_mean_sd),
het_teffect_sd = rexp(n = N, rate = teffect_sd_rate),
ids = 1:N
)
# Ensure we have obs in every age value (better example)
stopifnot(dat$age %>% unique() %>% length() == length(ages))
# Block randomized treatment and outcome draws
treated <- dat %>%
group_by(age_cat) %>%
slice_sample(prop = 0.5) %>%
ungroup()
dat <- dat %>%
mutate(
treat = if_else(ids %in% treated$ids, 1, 0),
y = rnorm(
n = N,
control_mean + treat * het_teffect_mean,
sd = control_sd + treat * het_teffect_sd
)
)
return(dat)
}
###########################################################################
# Example simulation
###########################################################################
# Simulate N = 100
dat <- sim_data(100)
table(dat$treat, dat$age_cat) # check blocking
###########################################################################
# Linear combination?
###########################################################################
# Regression: w/ constant
lm(y ~ treat + age + age_cat, data = dat) # factor level term drops
# Regression: no constant
lm(y ~ -1 + treat + age + age_cat, data = dat) # no dropped term
###########################################################################
# Show bias in adjusted ATE
###########################################################################
# Throw some of above code into function
get_bias <- function(data){
# unadjusted estimate
unadj <- lm(y ~ treat, data = data)
# adjusted estimate
adj <- lm(y ~ treat + age + age_cat, data = data)
# bias
unadj$coefficients["treat"] - adj$coefficients["treat"]
}
###########################################################################
# Show bias for varying N
###########################################################################
# Expected bias magnitude declines with N (better balance)
get_bias(sim_data(50))
get_bias(sim_data(100))
get_bias(sim_data(500))
get_bias(sim_data(1000))
get_bias(sim_data(5000))
get_bias(sim_data(500000))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment