andrewheiss/brms_ame.md

## brms_ame.md

      
    Raw
  

              brms_ame.md
            
          
    library(tidyverse)
library(brms)
library(tidybayes)
library(magrittr)

options(mc.cores = 4,
        brms.backend = "cmdstanr")

set.seed(7305)  # From random.org

# Load data and models
# Download from https://www.dropbox.com/s/8b66jz8larubvuc/smaller_data.csv
example_data <- read_csv("smaller_data.csv")

# Pre-run models
# Download from: 
# - https://www.dropbox.com/s/plen8gdirnasp2k/model_with_logged_outcome.rds
# - https://www.dropbox.com/s/4vj2sao0qayfnby/model_with_hurdled_not_logged_outcome.rds
model_with_logged_outcome <- readRDS("model_with_logged_outcome.rds")
model_with_hurdled_not_logged_outcome <- readRDS("model_with_hurdled_not_logged_outcome.rds")

# Code for running models; these are pre-run and saved as .rds files
model_with_logged_outcome <- brm(
  bf(total_oda_log_lead1 | weights(iptw) ~ advocacy +
       (1 | gwcode) + (1 | year)),
  data = example_data,
  family = gaussian(),
  prior = c(set_prior("normal(0, 20)", class = "Intercept"),
            set_prior("normal(0, 3)", class = "b"),
            set_prior("cauchy(0, 1)", class = "sd")),
  chains = 4, iter = 4000,
  warmup = 1000, seed = 4046
)
# saveRDS(model_with_logged_outcome, "model_with_logged_outcome.rds")

model_with_hurdled_not_logged_outcome <- brm(
  bf(total_oda_lead1 | weights(iptw) ~ advocacy + (1 | gwcode) + (1 | year),
     hu ~ advocacy + (1 | gwcode) + (1 | year)),
  data = example_data,
  # Has to be rstan bc cmdstanr complains about initial values :(
  family = hurdle_lognormal(), backend = "rstan",
  prior = c(set_prior("normal(0, 20)", class = "Intercept"),
            set_prior("normal(0, 3)", class = "b"),
            set_prior("cauchy(0, 1)", class = "sd"),
            set_prior("logistic(-2, 0.6)", class = "Intercept", dpar = "hu")),
  chains = 4, iter = 4000,
  warmup = 1000, seed = 4046
)
# saveRDS(model_with_hurdled_not_logged_outcome, "model_with_hurdled_not_logged_outcome.rds")

# Plot coefs
plot_regular <- gather_draws(model_with_logged_outcome, 
                             c(b_advocacy))

ggplot(plot_regular, aes(x = .value, y = .variable)) +
  geom_vline(xintercept = 0) +
  stat_halfeye(.width = c(0.8, 0.95), alpha = 0.8, point_interval = "median_hdi")

plot_hurdle <- gather_draws(model_with_hurdled_not_logged_outcome, 
                            c(b_advocacy,
                              b_hu_advocacy))

ggplot(plot_hurdle, aes(x = .value, y = .variable)) +
  geom_vline(xintercept = 0) +
  stat_halfeye(.width = c(0.8, 0.95), alpha = 0.8, point_interval = "median_hdi")

# Plot conditional effects
conditional_effects(model_with_logged_outcome)

conditional_effects(model_with_hurdled_not_logged_outcome)

# AME stuff
variable <- "advocacy"

# Function for numerical derivative ± stuff
stepsize <- 1e-7
setstep <- function(x) {
  x + (max(abs(x), 1, na.rm = TRUE) * sqrt(stepsize)) - x
}

# Regular model AME
d_regular <- model_with_logged_outcome$data %>% 
  select(-total_oda_log_lead1) %>% 
  group_by_all() %>% 
  count(name = "w") %>% 
  ungroup()

d1_regular <- d0_regular <- d_regular

d0_regular[[variable]] <- d0_regular[[variable]] - setstep(d0_regular[[variable]])
d1_regular[[variable]] <- d1_regular[[variable]] + setstep(d1_regular[[variable]])

f0_regular <- d0_regular %>%
  add_fitted_draws(model = model_with_logged_outcome,
                   value = paste0(variable, "_d0"))

f1_regular <- d1_regular %>%
  add_fitted_draws(model = model_with_logged_outcome,
                   value = paste0(variable, "_d1"))

ame_regular <- f0_regular %>% 
  ungroup() %>% 
  mutate(
    me =
      f1_regular[[paste0(variable, "_d1")]] %>%
      subtract(f0_regular[[paste0(variable, "_d0")]]) %>% 
      divide_by(d1_regular[[variable]] - d0_regular[[variable]])
  ) %>% 
  group_by_at(".draw") %>% 
  summarise(ame = sum(me * w)/sum(w)) %>% 
  select(ame) %>%
  ungroup()

ame_regular$ame %>% 
  quantile(probs = c(0.5, 0.025, 0.975))
#>        50%       2.5%      97.5% 
#> -0.5032870 -0.9364032 -0.0786715

ggplot(tibble(x = ame_regular$ame), aes(x = x)) +
  geom_density(fill = "grey50", color = NA, alpha = 0.8) +
  geom_vline(xintercept = 0)

# Hurdle model AME
d_hurdle <- model_with_hurdled_not_logged_outcome$data %>% 
  select(-total_oda_lead1) %>% 
  group_by_all() %>% 
  count(name = "w") %>% 
  ungroup()

d1_hurdle <- d0_hurdle <- d_hurdle

d0_hurdle[[variable]] <- d0_hurdle[[variable]] - setstep(d0_hurdle[[variable]])
d1_hurdle[[variable]] <- d1_hurdle[[variable]] + setstep(d1_hurdle[[variable]])

f0_hurdle <- d0_hurdle %>%
  add_fitted_draws(model = model_with_hurdled_not_logged_outcome,
                   value = paste0(variable, "_d0"))

f1_hurdle <- d1_hurdle %>%
  add_fitted_draws(model = model_with_hurdled_not_logged_outcome,
                   value = paste0(variable, "_d1"))

ame_hurdle <- f0_hurdle %>% 
  ungroup() %>% 
  mutate(
    me =
      f1_hurdle[[paste0(variable, "_d1")]] %>%
      subtract(f0_hurdle[[paste0(variable, "_d0")]]) %>% 
      divide_by(d1_hurdle[[variable]] - d0_hurdle[[variable]])
  ) %>% 
  group_by_at(".draw") %>% 
  summarise(ame = sum(me * w)/sum(w)) %>% 
  select(ame) %>%
  ungroup()

# PHEW from $50 million to $615 million! That's a huge range
ame_hurdle$ame %>% 
  quantile(probs = c(0.5, 0.025, 0.975)) %>% 
  scales::dollar()
#>            50%           2.5%          97.5% 
#> "$331,114,209"  "$49,817,603" "$615,229,868"

ggplot(tibble(x = ame_hurdle$ame), aes(x = x)) +
  geom_density(fill = "grey50", color = NA, alpha = 0.8) +
  geom_vline(xintercept = 0) + 
  scale_x_continuous(labels = scales::dollar)

^{Created on 2021-05-25 by the reprex package (v1.0.0)}