Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arthur-albuquerque/5d50771dcfdb5cb700c11f1fcf5ed4a9 to your computer and use it in GitHub Desktop.
Save arthur-albuquerque/5d50771dcfdb5cb700c11f1fcf5ed4a9 to your computer and use it in GitHub Desktop.
PO model rmsb vs. brms
pacman::p_load(dplyr,
brms,
patchwork,
ggplot2)
# Install rmsb v0.2 to use cmdstan
pacman::p_load_gh("harrelfe/rmsb")
# Data
a <- c(rep(0,0), rep(1,3), rep(2,5), rep(3,5), rep(4,25), rep(5,40), rep(6,24))
b <- c(rep(0,2), rep(1,3), rep(2,9), rep(3,17), rep(4,33), rep(5,18), rep(6,18))
x <- c(rep('medical', length(a)), rep('endovascular', length(b)))
y <- c(a, b)
d1 = data.frame(x, y, study = "RESCUE-Japan LIMIT")
a <- c(rep(0,0), rep(1,3), rep(2,9), rep(3,20), rep(4,36), rep(5,32), rep(6,71))
b <- c(rep(0,2), rep(1,9), rep(2,25), rep(3,31), rep(4,27), rep(5,15), rep(6,68))
x <- c(rep('medical', length(a)), rep('endovascular', length(b)))
y <- c(a, b)
d2 = data.frame(x, y, study = "SELECT2")
a <- c(rep(0,0), rep(1,8), rep(2,18), rep(3,49), rep(4,60), rep(5,45), rep(6,45))
b <- c(rep(0,9), rep(1,19), rep(2,41), rep(3,39), rep(4,45), rep(5,27), rep(6,50))
x <- c(rep('medical', length(a)), rep('endovascular', length(b)))
y <- c(a, b)
d3 = data.frame(x, y, study = "ANGEL-ASPECT")
d = rbind(d1, d2, d3)
d$y <-factor(d$y, ordered = T)
d$x<-factor(d$x, levels = c("endovascular", "medical"))
dd <- datadist(d); options(datadist='dd')
# Proportional Odds Model----
# brms
# Check default priors in brms
get_prior(family = cumulative("logit"),
formula = y ~ x + (1|study) ,
data = d)
PO_brms <- brms::brm(
family = cumulative("logit"), # PO model
formula = y ~ x + (1|study) ,
prior =
prior(normal(0, 1.5), class = "b") +
prior(student_t(4, 0, 2.5), class = "sd"),
iter = 2000,
chain = 4,
cores = parallel::detectCores(),
control = list(adapt_delta = .99),
backend = "cmdstanr",
seed = 123,
data = d
)
# Check priors actually used
PO_brms$prior
# rmsb
PO_rmsb <- rmsb::blrm(y ~ x + cluster(study),
# intercept parameters prior: t-distribution with 3 d.f.
iprior = 2,
# scale parameter for the t-distribution for priors for the intercepts
ascale = 2.5,
# normal(0, 1.5) prior for "x"
priorsd = 1.5,
# random effect prior: half-t distribution with 4 d.f.
psigma = 1,
# assumed mean of the prior distribution of the RE SD
rsdmean = 0,
# assumed scale for the half t distribution for the RE SD
rsdsd = 2.5,
iter = 2000,
chains = 4,
parallel_chains = parallel::detectCores(),
adapt_delta = .99,
backend = "cmdstan",
seed = 123,
data = d)
# PPC rmsb custom function ----
# Source: https://hbiostat.org/R/rmsb/rmsbGraphics.html
pp_check.blrm <- function(modblrm,
type,
ndraws = NULL,
group = NULL) {
## posterior predictive checks using \pkg{bayesplot} package
## codes adapted from brms::pp_check.R (all mistakes, however, are clearly mine)
## https://github.com/paul-buerkner/brms/blob/master/R/pp_check.R
if (!any(class(modblrm) %in% Cs(blrm, rmsb))) {
stop("rms object must be of class blrm",
call. = FALSE)
}
if (missing(type)) {
type <- "dens_overlay"
}
# from brms::pp_check.r
valid_types <- as.character(bayesplot::available_ppc(""))
valid_types <- sub("^ppc_", "", valid_types)
if (!type %in% valid_types) {
stop("Type '", type, "' is not a valid ppc type. ",
"Valid types are:\n", paste(valid_types, collapse = " , "))
}
ppc_fun <- get(paste0("ppc_", type), asNamespace("bayesplot"))
newdata = eval(modblrm$call$data)[all.vars(modblrm$sformula)] %>% data.frame
if (anyNA(newdata)) {
warning("NA responses in sample") ## issue warnings about NAs
newdata <- newdata[complete.cases(newdata), ] ## remove NAs (if any)
}
valid_vars <- modblrm$Design$name
## codes from pp_check.brms
if ("group" %in% names(formals(ppc_fun))) {
if (is.null(group)) {
stop("Argument 'group' is required for ppc type '", type, "'.")
}
if (!group %in% valid_vars) {
stop("Variable '", group, "' could not be found in the data.")
}
}
## Y and Yrep
y <- newdata [ , all.vars(modblrm$sformula)[1] ]
y_length = length(unique(y))
if(is.null(ndraws)) {ndraws = 20} ## 20 draws to save time
if(y_length == 2) {
## binary response variable
pred_binary <- predict(modblrm, newdata, fun=plogis, funint=FALSE, posterior.summary="all")
yrep_alldraws <- apply(pred_binary, c(1,2), function (x) rbinom(1,1,x))
yrep <- yrep_alldraws[c(1:ndraws), ]
} else {
## >=3 level ordinal/continuous response variable
pred_ordinal <- predict(modblrm, newdata, type="fitted.ind", posterior.summary = "all")
yrep_alldraws <- apply(pred_ordinal, c(1,2), function (x) {
myvec = unlist(rmultinom(1,1,x))
myvec_names = modblrm$ylevels ## get unique levels of Y
return(myvec_names[myvec==1])
})
yrep_alldraws <- apply(yrep_alldraws, c(1,2), as.numeric)
yrep <- yrep_alldraws[c(1:ndraws), ]
}
##### # I had to modify from "y" to "array(y) - 1" because there was a bug
class(y) = "numeric"
ppc_args <- list(array(y) - 1, yrep)
if (!is.null(group)) {
ppc_args$group <- newdata[[group]]
}
do.call(ppc_fun, ppc_args)
}
# PPC plots ----
p1 = pp_check(PO_brms, group = "x", type = "bars_grouped", ndraws = 2000) +
labs(title = "brms") + coord_cartesian(ylim = c(0, 210))
p2 = pp_check(PO_rmsb, group = "x", "bars_grouped", ndraws = 2000) +
labs(title = "rmsb") + coord_cartesian(ylim = c(0, 210))
p1 + p2 + patchwork::plot_layout(guides = 'collect')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment