Skip to content

Instantly share code, notes, and snippets.

@gowerc
Last active September 19, 2022 10:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gowerc/4191a8f7237f1e08c932f4a3ed4a1231 to your computer and use it in GitHub Desktop.
Save gowerc/4191a8f7237f1e08c932f4a3ed4a1231 to your computer and use it in GitHub Desktop.
#############################
#
#
# Exploring the impact of completely random & independently
# distributed covariates
#
#
#############################
library(dplyr)
###########################
#
#
# Logistic Regression
#
# Missing covariates in the model cause beta coeficients to be
# biased towards 0 as the error term has no where to go to be
# accounted for
#
#
get_vals <- function(mod, name) {
est <- coef(mod)[["trtB"]]
se <- sqrt(vcov(mod)["trtB", "trtB"])
res <- c("est" = est, "se" = se)
names(res) <- paste0(names(res), "_", name)
res
}
N <- 3000
beta_trt <- c(
"A" = 0,
"B" = 1.3
)
dat <- tibble(
covar1 = rnorm(N),
covar2 = rnorm(N),
trt = sample(c("A", "B"), size = N, replace = TRUE),
p = plogis( 0.16 + beta_trt[trt] + 0.6 * covar1 - 0.4 * covar2),
won = rbinom(N, 1, p)
)
mod1 <- glm(
data = dat,
formula = won ~ covar1 + covar2 + trt,
family = binomial()
)
mod2 <- glm(
data = dat,
formula = won ~ trt,
family = binomial()
)
get_vals(mod1, "full_model")
# est_full_model se_full_model
# 1.37059584 0.08775205
get_vals(mod2, "simple_model")
# est_simple_model se_simple_model
# 1.2290236 0.0830458
###########################
#
#
# Linear Models
#
# Including covariates (even if indepent and random) improves
# SE of main parameter of interest.
#
#
N <- 4000
beta_trt <- c(
"A" = 0,
"B" = 3
)
dat2 <- tibble(
trt = sample(c("A", "B"), size = N, replace = TRUE),
covar1 = rnorm(N),
covar2 = rnorm(N),
outcome_mean = 10 + beta_trt[trt] + 4 * covar1 - 2* covar2,
outcome = rnorm(N, outcome_mean, 6)
)
naive <- t.test(
dat2 %>% filter(trt == "B") %>% pull(outcome),
dat2 %>% filter(trt == "A") %>% pull(outcome)
)
mod1 <- lm(
data = dat2,
formula = outcome ~ trt + covar1 + covar2
)
mod2 <- lm(
data = dat2,
formula = outcome ~ trt
)
c(
"coef_naive" = diff(-naive$estimate)[[1]],
"se_naive" = naive$stderr
)
c(
"coef_full_model" = coef(mod1)[["trtB"]],
"se_full_model" = sqrt(vcov(mod1)["trtB", "trtB"])
)
c(
"coef_simple_model" = coef(mod2)[["trtB"]],
"se_simple_moel" = sqrt(vcov(mod2)["trtB", "trtB"])
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment