Skip to content

Instantly share code, notes, and snippets.

@alexpghayes
Created September 13, 2022 23:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexpghayes/b3792cb594d7a05989a08ffb55d14818 to your computer and use it in GitHub Desktop.
Save alexpghayes/b3792cb594d7a05989a08ffb55d14818 to your computer and use it in GitHub Desktop.
library(fastRG)
library(estimatr)
library(tidyverse)
set.seed(27)
get_one_estimate <- function() {
k <- 5
n <- 1000
B <- matrix(stats::runif(k * k), nrow = k, ncol = k)
theta <- round(stats::rlnorm(n, 2))
pi <- c(1, 2, 4, 1, 1)
m <- dcsbm(
theta = theta,
B = B,
pi = pi,
expected_degree = 50
)
A <- sample_sparse(m)
trt <- rbinom(n, size = 1, prob = 0.5)
perc_trted_neighbors <- drop(A %*% trt / rowSums(A))
perc_trted_neighbors[is.na(perc_trted_neighbors)] <- 0 # isolated nodes
y <- trt + cos(perc_trted_neighbors) + rnorm(n)
fit <- lm_robust(y ~ trt)
tidy(fit) %>%
filter(term == "trt")
}
ests <- purrr::map_dfr(1:50, \(x) get_one_estimate())
ests %>%
mutate(
sim = row_number()
) %>%
ggplot(aes(x = sim, ymin = conf.low, y = estimate, ymax = conf.high)) +
geom_pointrange() +
geom_hline(yintercept = 1, color = "steelblue") +
theme_minimal() +
labs(
title = "50 confidence intervals from the bonkers t-test",
caption = "Y(w[i], W[-i]) linear in trt and non-linear in portion of treated neighbors",
y = "Value (true direct effect is 1)",
x = "Simulation index"
)
@alexpghayes
Copy link
Author

library(fastRG)
library(estimatr)
library(tidyverse)
library(glue)
library(furrr)

plan(multisession)

set.seed(27)

# the potential outcomes model for y
scm_ey <- function(A, trt) {
  
  perc_trted_neighbors <- drop(A %*% trt / rowSums(A))
  perc_trted_neighbors[is.na(perc_trted_neighbors)] <- 0  # isolated nodes
  
  cos(trt * 3 * perc_trted_neighbors)
}

# numerically evaluate the direct effect -- should figure out a way to speed this up
dir <- function(A, trt) {
  
  diff <- rep(0, n)
  
  treated <- trt
  untreated <- trt
  
  for (i in 1:n) {
    
    treated[i] <- 1
    untreated[i] <- 0
    
    diff[i] <- scm_ey(A, treated)[i] - scm_ey(A, untreated)[i]
    
    treated[i] <- trt[i]
    untreated[i] <- trt[i]
  }
  
  mean(diff)
}

get_one_estimate <- function() {
  k <- 5
  n <- 500
  B <- matrix(stats::runif(k * k), nrow = k, ncol = k)
  
  theta <- round(stats::rlnorm(n, 2))
  
  pi <- c(1, 2, 4, 1, 1)
  
  m <- dcsbm(
    theta = theta,
    B = B,
    pi = pi,
    expected_degree = 50,
    allow_self_loops = FALSE
  )
  
  A <- sample_sparse(m)
  
  trt <- rbinom(n, size = 1, prob = 0.5)
  y <- scm_ey(A, trt) + rnorm(n)
  
  direct_effect <- dir(A, trt)
  
  fit <- lm_robust(y ~ trt)
  
  tidy(fit) %>% 
    filter(term == "trt") %>% 
    mutate(direct_effect = direct_effect)
}

get_one_estimate()

num_sims <- 100

ests <- furrr::future_map_dfr(1:num_sims, ~get_one_estimate()) %>% 
  mutate(
    covered = conf.low <= direct_effect & direct_effect <= conf.high
  )

coverage <- mean(ests2$covered)

ests %>% 
  mutate(
    sim = row_number()
  ) %>% 
  ggplot(aes(x = sim, ymin = conf.low, y = estimate, ymax = conf.high, color = covered)) +
  geom_pointrange() +
  geom_point(aes(x = sim, y = direct_effect), color = "black") +
  theme_minimal() +
  labs(
    title = glue("Confidence intervals (coverage is {round(coverage, 2)})"),
    caption = "Y(w[i], W[-i]) linear in trt and non-linear in portion of treated neighbors",
    y = "Value",
    x = "Simulation index"
  )

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment