arthur-albuquerque/gist:5d50771dcfdb5cb700c11f1fcf5ed4a9

## gistfile1.txt

pacman::p_load(dplyr,
               brms,
               patchwork,
               ggplot2)

# Install rmsb v0.2 to use cmdstan
pacman::p_load_gh("harrelfe/rmsb")

# Data
a <- c(rep(0,0), rep(1,3), rep(2,5), rep(3,5), rep(4,25), rep(5,40), rep(6,24))
b <- c(rep(0,2), rep(1,3), rep(2,9), rep(3,17), rep(4,33), rep(5,18), rep(6,18))
x <- c(rep('medical', length(a)), rep('endovascular', length(b)))
y <- c(a, b)
d1 = data.frame(x, y, study = "RESCUE-Japan LIMIT")


a <- c(rep(0,0), rep(1,3), rep(2,9), rep(3,20), rep(4,36), rep(5,32), rep(6,71))
b <- c(rep(0,2), rep(1,9), rep(2,25), rep(3,31), rep(4,27), rep(5,15), rep(6,68))
x <- c(rep('medical', length(a)), rep('endovascular', length(b)))
y <- c(a, b)
d2 = data.frame(x, y, study = "SELECT2")

a <- c(rep(0,0), rep(1,8), rep(2,18), rep(3,49), rep(4,60), rep(5,45), rep(6,45))
b <- c(rep(0,9), rep(1,19), rep(2,41), rep(3,39), rep(4,45), rep(5,27), rep(6,50))
x <- c(rep('medical', length(a)), rep('endovascular', length(b)))
y <- c(a, b)
d3 = data.frame(x, y, study = "ANGEL-ASPECT")

d = rbind(d1, d2, d3)

d$y <-factor(d$y, ordered = T)
d$x<-factor(d$x, levels = c("endovascular", "medical"))
dd <- datadist(d); options(datadist='dd')


# Proportional Odds Model----
# brms

# Check default priors in brms
get_prior(family = cumulative("logit"),
          formula = y ~ x + (1|study) ,
          data = d)

PO_brms <- brms::brm(
  family = cumulative("logit"), # PO model
  formula = y ~ x + (1|study) ,

  prior =
    prior(normal(0, 1.5), class = "b") +
    prior(student_t(4, 0, 2.5), class = "sd"),


  iter = 2000,
  chain = 4,
  cores = parallel::detectCores(),
  control = list(adapt_delta = .99),
  backend = "cmdstanr",
  seed = 123,
  data = d
)

# Check priors actually used
PO_brms$prior


# rmsb
PO_rmsb <- rmsb::blrm(y ~ x + cluster(study),
                       # intercept parameters prior: t-distribution with 3 d.f.
                       iprior = 2,
                       # scale parameter for the t-distribution for priors for the intercepts
                       ascale = 2.5,
                       # normal(0, 1.5) prior for "x"
                       priorsd = 1.5,
                       # random effect prior: half-t distribution with 4 d.f.
                       psigma = 1,
                       # assumed mean of the prior distribution of the RE SD
                       rsdmean = 0,
                       # assumed scale for the half t distribution for the RE SD
                       rsdsd = 2.5,

                       iter = 2000,
                       chains = 4,
                       parallel_chains = parallel::detectCores(),
                       adapt_delta = .99,
                       backend = "cmdstan",
                       seed = 123,
                       data = d)

# PPC rmsb custom function ----

# Source: https://hbiostat.org/R/rmsb/rmsbGraphics.html
pp_check.blrm <- function(modblrm,
                          type,
                          ndraws = NULL,
                          group = NULL) {

  ## posterior predictive checks using \pkg{bayesplot} package
  ## codes adapted from brms::pp_check.R (all mistakes, however, are clearly mine)
  ## https://github.com/paul-buerkner/brms/blob/master/R/pp_check.R


  if (!any(class(modblrm) %in% Cs(blrm, rmsb))) {
    stop("rms object must be of class blrm",
         call. = FALSE)
  }

  if (missing(type)) {
    type <- "dens_overlay"
  }

  # from brms::pp_check.r
  valid_types <- as.character(bayesplot::available_ppc(""))
  valid_types <- sub("^ppc_", "", valid_types)
  if (!type %in% valid_types) {
    stop("Type '", type, "' is not a valid ppc type. ",
         "Valid types are:\n", paste(valid_types, collapse = " , "))
  }

  ppc_fun <- get(paste0("ppc_", type), asNamespace("bayesplot"))

  newdata = eval(modblrm$call$data)[all.vars(modblrm$sformula)] %>% data.frame
  if (anyNA(newdata)) {
    warning("NA responses in sample")              ## issue warnings about NAs
    newdata <- newdata[complete.cases(newdata), ]  ## remove NAs (if any)
  }

  valid_vars  <- modblrm$Design$name
  ## codes from pp_check.brms
  if ("group" %in% names(formals(ppc_fun))) {
    if (is.null(group)) {
      stop("Argument 'group' is required for ppc type '", type, "'.")
    }
    if (!group %in% valid_vars) {
      stop("Variable '", group, "' could not be found in the data.")
    }
  }

  ## Y and Yrep
  y <- newdata [ , all.vars(modblrm$sformula)[1] ]
  y_length = length(unique(y))

  if(is.null(ndraws)) {ndraws = 20}  ## 20 draws to save time

  if(y_length == 2) {

    ## binary response variable
    pred_binary <- predict(modblrm, newdata, fun=plogis, funint=FALSE, posterior.summary="all")
    yrep_alldraws <- apply(pred_binary, c(1,2), function (x) rbinom(1,1,x))
    yrep <- yrep_alldraws[c(1:ndraws), ]

  } else {

    ## >=3 level ordinal/continuous response variable
    pred_ordinal <- predict(modblrm, newdata, type="fitted.ind", posterior.summary = "all")
    yrep_alldraws <-   apply(pred_ordinal, c(1,2), function (x) {
      myvec = unlist(rmultinom(1,1,x))
      myvec_names = modblrm$ylevels   ## get unique levels of Y
      return(myvec_names[myvec==1])
    })
    yrep_alldraws <- apply(yrep_alldraws, c(1,2), as.numeric)
    yrep <- yrep_alldraws[c(1:ndraws), ]

  }

  ##### # I had to modify from "y" to "array(y) - 1" because there was a bug
  class(y) = "numeric"
  ppc_args <- list(array(y) - 1, yrep)

  if (!is.null(group)) {
    ppc_args$group <- newdata[[group]]
  }

  do.call(ppc_fun, ppc_args)

}

# PPC plots ----

p1 = pp_check(PO_brms, group = "x", type = "bars_grouped", ndraws = 2000) +
  labs(title = "brms") + coord_cartesian(ylim = c(0, 210))

p2 = pp_check(PO_rmsb, group = "x", "bars_grouped", ndraws = 2000) +
  labs(title = "rmsb") + coord_cartesian(ylim = c(0, 210))

p1 + p2 + patchwork::plot_layout(guides = 'collect')

	pacman::p_load(dplyr,
	brms,
	patchwork,
	ggplot2)

	# Install rmsb v0.2 to use cmdstan
	pacman::p_load_gh("harrelfe/rmsb")

	# Data
	a <- c(rep(0,0), rep(1,3), rep(2,5), rep(3,5), rep(4,25), rep(5,40), rep(6,24))
	b <- c(rep(0,2), rep(1,3), rep(2,9), rep(3,17), rep(4,33), rep(5,18), rep(6,18))
	x <- c(rep('medical', length(a)), rep('endovascular', length(b)))
	y <- c(a, b)
	d1 = data.frame(x, y, study = "RESCUE-Japan LIMIT")


	a <- c(rep(0,0), rep(1,3), rep(2,9), rep(3,20), rep(4,36), rep(5,32), rep(6,71))
	b <- c(rep(0,2), rep(1,9), rep(2,25), rep(3,31), rep(4,27), rep(5,15), rep(6,68))
	x <- c(rep('medical', length(a)), rep('endovascular', length(b)))
	y <- c(a, b)
	d2 = data.frame(x, y, study = "SELECT2")

	a <- c(rep(0,0), rep(1,8), rep(2,18), rep(3,49), rep(4,60), rep(5,45), rep(6,45))
	b <- c(rep(0,9), rep(1,19), rep(2,41), rep(3,39), rep(4,45), rep(5,27), rep(6,50))
	x <- c(rep('medical', length(a)), rep('endovascular', length(b)))
	y <- c(a, b)
	d3 = data.frame(x, y, study = "ANGEL-ASPECT")

	d = rbind(d1, d2, d3)

	d$y <-factor(d$y, ordered = T)
	d$x<-factor(d$x, levels = c("endovascular", "medical"))
	dd <- datadist(d); options(datadist='dd')


	# Proportional Odds Model----
	# brms

	# Check default priors in brms
	get_prior(family = cumulative("logit"),
	formula = y ~ x + (1\|study) ,
	data = d)

	PO_brms <- brms::brm(
	family = cumulative("logit"), # PO model
	formula = y ~ x + (1\|study) ,

	prior =
	prior(normal(0, 1.5), class = "b") +
	prior(student_t(4, 0, 2.5), class = "sd"),


	iter = 2000,
	chain = 4,
	cores = parallel::detectCores(),
	control = list(adapt_delta = .99),
	backend = "cmdstanr",
	seed = 123,
	data = d
	)

	# Check priors actually used
	PO_brms$prior



	# rmsb
	PO_rmsb <- rmsb::blrm(y ~ x + cluster(study),
	# intercept parameters prior: t-distribution with 3 d.f.
	iprior = 2,
	# scale parameter for the t-distribution for priors for the intercepts
	ascale = 2.5,
	# normal(0, 1.5) prior for "x"
	priorsd = 1.5,
	# random effect prior: half-t distribution with 4 d.f.
	psigma = 1,
	# assumed mean of the prior distribution of the RE SD
	rsdmean = 0,
	# assumed scale for the half t distribution for the RE SD
	rsdsd = 2.5,

	iter = 2000,
	chains = 4,
	parallel_chains = parallel::detectCores(),
	adapt_delta = .99,
	backend = "cmdstan",
	seed = 123,
	data = d)

	# PPC rmsb custom function ----

	# Source: https://hbiostat.org/R/rmsb/rmsbGraphics.html
	pp_check.blrm <- function(modblrm,
	type,
	ndraws = NULL,
	group = NULL) {

	## posterior predictive checks using \pkg{bayesplot} package
	## codes adapted from brms::pp_check.R (all mistakes, however, are clearly mine)
	## https://github.com/paul-buerkner/brms/blob/master/R/pp_check.R


	if (!any(class(modblrm) %in% Cs(blrm, rmsb))) {
	stop("rms object must be of class blrm",
	call. = FALSE)
	}

	if (missing(type)) {
	type <- "dens_overlay"
	}

	# from brms::pp_check.r
	valid_types <- as.character(bayesplot::available_ppc(""))
	valid_types <- sub("^ppc_", "", valid_types)
	if (!type %in% valid_types) {
	stop("Type '", type, "' is not a valid ppc type. ",
	"Valid types are:\n", paste(valid_types, collapse = " , "))
	}

	ppc_fun <- get(paste0("ppc_", type), asNamespace("bayesplot"))

	newdata = eval(modblrm$call$data)[all.vars(modblrm$sformula)] %>% data.frame
	if (anyNA(newdata)) {
	warning("NA responses in sample") ## issue warnings about NAs
	newdata <- newdata[complete.cases(newdata), ] ## remove NAs (if any)
	}

	valid_vars <- modblrm$Design$name
	## codes from pp_check.brms
	if ("group" %in% names(formals(ppc_fun))) {
	if (is.null(group)) {
	stop("Argument 'group' is required for ppc type '", type, "'.")
	}
	if (!group %in% valid_vars) {
	stop("Variable '", group, "' could not be found in the data.")
	}
	}

	## Y and Yrep
	y <- newdata [ , all.vars(modblrm$sformula)[1] ]
	y_length = length(unique(y))

	if(is.null(ndraws)) {ndraws = 20} ## 20 draws to save time

	if(y_length == 2) {

	## binary response variable
	pred_binary <- predict(modblrm, newdata, fun=plogis, funint=FALSE, posterior.summary="all")
	yrep_alldraws <- apply(pred_binary, c(1,2), function (x) rbinom(1,1,x))
	yrep <- yrep_alldraws[c(1:ndraws), ]

	} else {

	## >=3 level ordinal/continuous response variable
	pred_ordinal <- predict(modblrm, newdata, type="fitted.ind", posterior.summary = "all")
	yrep_alldraws <- apply(pred_ordinal, c(1,2), function (x) {
	myvec = unlist(rmultinom(1,1,x))
	myvec_names = modblrm$ylevels ## get unique levels of Y
	return(myvec_names[myvec==1])
	})
	yrep_alldraws <- apply(yrep_alldraws, c(1,2), as.numeric)
	yrep <- yrep_alldraws[c(1:ndraws), ]

	}

	##### # I had to modify from "y" to "array(y) - 1" because there was a bug
	class(y) = "numeric"
	ppc_args <- list(array(y) - 1, yrep)

	if (!is.null(group)) {
	ppc_args$group <- newdata[[group]]
	}

	do.call(ppc_fun, ppc_args)

	}

	# PPC plots ----

	p1 = pp_check(PO_brms, group = "x", type = "bars_grouped", ndraws = 2000) +
	labs(title = "brms") + coord_cartesian(ylim = c(0, 210))

	p2 = pp_check(PO_rmsb, group = "x", "bars_grouped", ndraws = 2000) +
	labs(title = "rmsb") + coord_cartesian(ylim = c(0, 210))

	p1 + p2 + patchwork::plot_layout(guides = 'collect')