Skip to content

Instantly share code, notes, and snippets.

@ericnovik
Created February 18, 2026 23:48
Show Gist options
  • Select an option

  • Save ericnovik/54c9e9ebd7afff2485cb80931d67f6f8 to your computer and use it in GitHub Desktop.

Select an option

Save ericnovik/54c9e9ebd7afff2485cb80931d67f6f8 to your computer and use it in GitHub Desktop.
library(cmdstanr)
library(posterior)
library(dplyr)
library(readr)
library(purrr)
library(tidyr)
library(ggplot2)
# -------------------------------------------------------------------------
# 1. Ensure Model Exists (NCP Version for Sparse Data)
# -------------------------------------------------------------------------
mod <- cmdstan_model("meta-analysis/binom_bma.stan")
# -------------------------------------------------------------------------
# 2. Define Fitting Function (Single Endpoint)
# -------------------------------------------------------------------------
fit_single_endpoint <- function(data_subset, model_obj) {
endpoint_name <- unique(data_subset$End_Point)
message(paste("Fitting model for:", endpoint_name))
# Prepare Data
clean_data <- data_subset |>
filter(Study != "Overall") |>
filter(!is.na(Intervention_Events_n))
stan_data <- list(
J = nrow(clean_data),
r_c = as.integer(clean_data$Control_Events_n),
n_c = as.integer(clean_data$Control_Events_N),
r_t = as.integer(clean_data$Intervention_Events_n),
n_t = as.integer(clean_data$Intervention_Events_N)
)
# Fit with high adapt_delta to prevent divergences
fit <- model_obj$sample(
data = stan_data,
seed = 123,
chains = 4,
parallel_chains = 4,
refresh = 0,
adapt_delta = 0.99
)
draws <- fit$draws(format = "df")
# Helper for extraction
summarize_param <- function(param_col, study_label, type_label) {
draws |>
summarise(
mean = mean(.data[[param_col]]),
lo = quantile(.data[[param_col]], 0.025),
hi = quantile(.data[[param_col]], 0.975)
) |>
mutate(
Study = study_label,
Type = type_label,
End_Point = endpoint_name
)
}
# 1. Study Effects
study_res <- map_dfr(1:nrow(clean_data), function(i) {
summarize_param(paste0("study_OR[", i, "]"), clean_data$Study[i], "Study")
})
# 2. Overall Effect
overall_res <- summarize_param("mean_OR", "Overall", "Summary")
# 3. Prediction Interval
pred_res <- summarize_param("pred_OR", "Predicted New Study", "Prediction")
# Return List
list(
summary = bind_rows(study_res, overall_res, pred_res),
fit = fit
)
}
# -------------------------------------------------------------------------
# 3. Main Execution
# -------------------------------------------------------------------------
raw_data <- read_csv("meta-analysis/8f64aaa2-e295-4fcb-b6a2-ccc2b102b6dd-result.csv")
# A. Fit Models
output_list <- raw_data |>
group_split(End_Point) |>
map(\(df) fit_single_endpoint(df, mod))
# B. Name the list correctly (Fixes the sorting issue)
list_names <- output_list |>
map_chr(\(x) unique(x$summary$End_Point))
names(output_list) <- list_names
# C. Extract Components
results_all <- output_list |> map_dfr(\(x) x$summary)
fits_all <- output_list |> map(\(x) x$fit)
# -------------------------------------------------------------------------
# 4. Calculate Probabilities (Pr(OR < 1))
# -------------------------------------------------------------------------
probs_df <- fits_all |>
map_dfr(function(fit) {
# Extract mean_OR draws
draws <- fit$draws(variables = "mean_OR", format = "matrix")
tibble(Prob_Benefit = mean(draws < 1))
}, .id = "End_Point") |>
mutate(Prob_Label = scales::percent(Prob_Benefit, accuracy = 0.1))
print("Probabilities of Benefit (mean_OR < 1):")
print(probs_df)
# -------------------------------------------------------------------------
# 5. Plotting: Frequentist vs Bayesian
# -------------------------------------------------------------------------
# Prepare Frequentist Data
freq_data <- raw_data |>
select(Study, End_Point, mean = HR, lo = CI_Low, hi = CI_High) |>
filter(!is.na(mean)) |>
mutate(
Method = "Frequentist (Reported)",
Type = if_else(Study == "Overall", "Summary", "Study")
)
# Prepare Bayesian Data
bayes_data <- results_all |>
select(Study, End_Point, mean, lo, hi, Type) |>
mutate(Method = "Bayesian (Posterior)")
# Merge
plot_data <- bind_rows(freq_data, bayes_data) |>
mutate(
# Order: Prediction (top), Summary, Studies
Sort_Order = case_when(Type == "Prediction" ~ 1, Type == "Summary" ~ 2, TRUE ~ 3),
Method = factor(Method, levels = c("Bayesian (Posterior)", "Frequentist (Reported)"))
)
# Generate Plot
ggplot(plot_data, aes(y = reorder(Study, Sort_Order), x = mean, xmin = lo, xmax = hi, color = Method)) +
# Null Effect Line
geom_vline(xintercept = 1, linetype = "dashed", color = "gray50") +
# PointRanges (Dodged to show comparison)
geom_pointrange(
position = position_dodge(width = 0.6),
aes(shape = Type, size = Type, alpha = Type)
) +
facet_wrap(~End_Point, scales = "free", ncol = 2) +
scale_x_log10() +
scale_color_manual(values = c("Bayesian (Posterior)" = "#0072B2", "Frequentist (Reported)" = "#D55E00")) +
# Custom Shapes/Sizes for hierarchy
scale_shape_manual(values = c("Study" = 16, "Summary" = 18, "Prediction" = 5)) +
scale_size_manual(values = c("Study" = 0.6, "Summary" = 1, "Prediction" = 0.8)) +
scale_alpha_manual(values = c("Study" = 0.7, "Summary" = 1, "Prediction" = 0.6)) +
labs(
title = "Comparison of Frequentist vs. Bayesian Estimates",
subtitle = "Bayesian Hierarchical Model (NCP) with Prediction Intervals",
x = "Hazard Ratio (log scale)",
y = NULL
) +
theme_bw() +
theme(
legend.position = "bottom",
strip.text = element_text(face = "bold"),
axis.text.y = element_text(face = "bold"),
panel.grid.minor = element_blank()
) +
guides(size = "none", alpha = "none")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment