stonegold546/generic_reg_ord_brms.R

## generic_reg_ord_brms.R
### Block 02 -- Load packages

library(ordinalCont)  # Just to check frequentist
library(splines2)
library(brms)

### Block 03 -- Load helper functions

source("helper_file_ord_generic.R")

### Block 04 -- Create demo dataset

# For lack of a better example -- creating fake Likert, 6 levels???
head(dat <- lavaan::HolzingerSwineford1939)
summary(dat[, paste0("x", 1:9)])
dat <- cbind(dat[, 1:6], apply(dat[, paste0("x", 1:9)], 2, function (xs) {
  as.integer(cut(xs, breaks = c(-1, 3, 5, 7:10), ordered_result = TRUE))
}))
summary(dat)
# Compute average score
dat$mean_score <- rowMeans(dat[, paste0("x", 1:9)])
head(dat)
hist(dat$mean_score)

### Block 05 -- Provide information

user_input <- list(
  min = 1,  # Theoretical minimum of each item
  max = 6,  # Theoretical maximum of each item
  n_items = 9  # Number of averaged items
)

### Block 06 -- Data pre-processing

# Create ordinal outcome
dat$y_ord <- helper$create_stan_data(dat$mean_score, user_input)
table(dat$y_ord)

summary(dat)

### Block 07 -- Example model

# Some random model, set priors below as needed
fit_0 <- brm(
  y_ord ~ factor(sex) + I(ageyr - 11) + factor(school), dat,
  family = helper$cont_cumul,  # Custom log-likelihood defined in helper file
  seed = 12345, cores = 4,
  stanvars = helper$make_sv(user_input),  # Additional variables/params for custom LL
  prior = c(prior(student_t(3, 0, 2.5), class = "Intercept")))
print(fit_0, digits = 4)

# Just to check with ordinalCont (frequentist approach)
# ocm does not reverse the sign of coefficients
summary(ocm(
  mean_score ~ factor(sex) + I(ageyr - 11) + factor(school),
  dat, scale = c(1, 5)))

# Another random model, set priors below as needed
fit_1 <- brm(
  y_ord ~ factor(sex) + factor(school) + (1 | ageyr), dat,
  family = helper$cont_cumul, seed = 12345, cores = 4,
  stanvars = helper$make_sv(user_input),
  prior = c(prior(student_t(3, 0, 2.5), class = "Intercept")))
print(fit_1, digits = 4)

summary(ocm(
  mean_score ~ factor(sex) + factor(school) + (1 | ageyr),
  dat, scale = c(1, 5)))


## helper_file_ord_generic.R
helper <- list()

helper$points <- function (user_inp) {
  with(user_inp, seq(min * n_items, max * n_items) / n_items)
}

helper$create_splines <- function (user_inp) {
  N_cuts <- length(helper$points(user_inp)) - 1
  # df = number of internal cuts + degree + intercept
  df <- min(15 + 2 + 1, N_cuts - 2)
  Spl <- -.5 + splines2::iSpline(
    1:N_cuts, df = df, degree = 2, intercept = TRUE)
  return(Spl)
}

helper$create_stan_data <- function (y, user_inp) {
  points <- helper$points(user_inp)
  as.integer(round(user_inp$n_items * (y - user_inp$min) + 1))
}

# To use vectorized log-likelihood:
#   Call helper$cont_cumul with loop = FALSE;
#   Call helper$make_sv with weighted = FALSE;
#   This is useful if trying to extend model to include covariates.
helper$cont_cumul <- function (loop = FALSE) {
  brms::custom_family(
    "cont_Cumul", dpars = c("mu"), links = c("identity"),
    type = "int", vars = c("cuts"), loop = loop)
}
helper$make_sv <- function (user_inp, weighted = FALSE) {
  if (weighted) {
    stan_funs <- "
  real cont_Cumul_lpmf(int x, real mu, vector cuts) {
    return ordered_logistic_lpmf(x | mu, cuts);
  }
"
  } else {
    stan_funs <- "
  real cont_Cumul_lpmf(int[] x, vector mu, vector cuts) {
    return ordered_logistic_lpmf(x | mu, cuts);
  }
"
  }

  create_cuts <- "
  vector[N_cuts] cuts = Spl * gamma_Xtra_0;
"
  N_cuts <- as.integer(user_inp$n_items * (user_inp$max - user_inp$min))
  Spl <- helper$create_splines(user_inp)
  res <- c(
    stanvar(scode = stan_funs, block = "functions"),
    stanvar(N_cuts, name = "N_cuts", block = "data"),
    stanvar(ncol(Spl), name = "N_gamma", block = "data"),
    stanvar(Spl, name = "Spl", block = "data"),
    stanvar(scode = "vector<lower = 0>[N_gamma] gamma_Xtra_0;", block = "parameters"),
    stanvar(scode = create_cuts, block = "model"),
    stanvar(scode = "gamma_Xtra_0 ~ std_normal();", block = "model"))
  return(res)
}
	### Block 02 -- Load packages

	library(ordinalCont) # Just to check frequentist
	library(splines2)
	library(brms)

	### Block 03 -- Load helper functions

	source("helper_file_ord_generic.R")

	### Block 04 -- Create demo dataset

	# For lack of a better example -- creating fake Likert, 6 levels???
	head(dat <- lavaan::HolzingerSwineford1939)
	summary(dat[, paste0("x", 1:9)])
	dat <- cbind(dat[, 1:6], apply(dat[, paste0("x", 1:9)], 2, function (xs) {
	as.integer(cut(xs, breaks = c(-1, 3, 5, 7:10), ordered_result = TRUE))
	}))
	summary(dat)
	# Compute average score
	dat$mean_score <- rowMeans(dat[, paste0("x", 1:9)])
	head(dat)
	hist(dat$mean_score)

	### Block 05 -- Provide information

	user_input <- list(
	min = 1, # Theoretical minimum of each item
	max = 6, # Theoretical maximum of each item
	n_items = 9 # Number of averaged items
	)

	### Block 06 -- Data pre-processing

	# Create ordinal outcome
	dat$y_ord <- helper$create_stan_data(dat$mean_score, user_input)
	table(dat$y_ord)

	summary(dat)

	### Block 07 -- Example model

	# Some random model, set priors below as needed
	fit_0 <- brm(
	y_ord ~ factor(sex) + I(ageyr - 11) + factor(school), dat,
	family = helper$cont_cumul, # Custom log-likelihood defined in helper file
	seed = 12345, cores = 4,
	stanvars = helper$make_sv(user_input), # Additional variables/params for custom LL
	prior = c(prior(student_t(3, 0, 2.5), class = "Intercept")))
	print(fit_0, digits = 4)

	# Just to check with ordinalCont (frequentist approach)
	# ocm does not reverse the sign of coefficients
	summary(ocm(
	mean_score ~ factor(sex) + I(ageyr - 11) + factor(school),
	dat, scale = c(1, 5)))

	# Another random model, set priors below as needed
	fit_1 <- brm(
	y_ord ~ factor(sex) + factor(school) + (1 \| ageyr), dat,
	family = helper$cont_cumul, seed = 12345, cores = 4,
	stanvars = helper$make_sv(user_input),
	prior = c(prior(student_t(3, 0, 2.5), class = "Intercept")))
	print(fit_1, digits = 4)

	summary(ocm(
	mean_score ~ factor(sex) + factor(school) + (1 \| ageyr),
	dat, scale = c(1, 5)))
	helper <- list()

	helper$points <- function (user_inp) {
	with(user_inp, seq(min * n_items, max * n_items) / n_items)
	}

	helper$create_splines <- function (user_inp) {
	N_cuts <- length(helper$points(user_inp)) - 1
	# df = number of internal cuts + degree + intercept
	df <- min(15 + 2 + 1, N_cuts - 2)
	Spl <- -.5 + splines2::iSpline(
	1:N_cuts, df = df, degree = 2, intercept = TRUE)
	return(Spl)
	}

	helper$create_stan_data <- function (y, user_inp) {
	points <- helper$points(user_inp)
	as.integer(round(user_inp$n_items * (y - user_inp$min) + 1))
	}

	# To use vectorized log-likelihood:
	# Call helper$cont_cumul with loop = FALSE;
	# Call helper$make_sv with weighted = FALSE;
	# This is useful if trying to extend model to include covariates.
	helper$cont_cumul <- function (loop = FALSE) {
	brms::custom_family(
	"cont_Cumul", dpars = c("mu"), links = c("identity"),
	type = "int", vars = c("cuts"), loop = loop)
	}
	helper$make_sv <- function (user_inp, weighted = FALSE) {
	if (weighted) {
	stan_funs <- "
	real cont_Cumul_lpmf(int x, real mu, vector cuts) {
	return ordered_logistic_lpmf(x \| mu, cuts);
	}
	"
	} else {
	stan_funs <- "
	real cont_Cumul_lpmf(int[] x, vector mu, vector cuts) {
	return ordered_logistic_lpmf(x \| mu, cuts);
	}
	"
	}

	create_cuts <- "
	vector[N_cuts] cuts = Spl * gamma_Xtra_0;
	"
	N_cuts <- as.integer(user_inp$n_items * (user_inp$max - user_inp$min))
	Spl <- helper$create_splines(user_inp)
	res <- c(
	stanvar(scode = stan_funs, block = "functions"),
	stanvar(N_cuts, name = "N_cuts", block = "data"),
	stanvar(ncol(Spl), name = "N_gamma", block = "data"),
	stanvar(Spl, name = "Spl", block = "data"),
	stanvar(scode = "vector<lower = 0>[N_gamma] gamma_Xtra_0;", block = "parameters"),
	stanvar(scode = create_cuts, block = "model"),
	stanvar(scode = "gamma_Xtra_0 ~ std_normal();", block = "model"))
	return(res)
	}