Skip to content

Instantly share code, notes, and snippets.

@stonegold546
Last active May 24, 2022 20:33
Show Gist options
  • Save stonegold546/9a01f0ce1757c5a0cfb59d0055e19eb2 to your computer and use it in GitHub Desktop.
Save stonegold546/9a01f0ce1757c5a0cfb59d0055e19eb2 to your computer and use it in GitHub Desktop.
Continuous data ordinal regression using monotonic splines to estimate thresholds
### Block 02 -- Load packages
library(ordinalCont) # Just to check frequentist
library(splines2)
library(brms)
### Block 03 -- Load helper functions
source("helper_file_ord_generic.R")
### Block 04 -- Create demo dataset
# For lack of a better example -- creating fake Likert, 6 levels???
head(dat <- lavaan::HolzingerSwineford1939)
summary(dat[, paste0("x", 1:9)])
dat <- cbind(dat[, 1:6], apply(dat[, paste0("x", 1:9)], 2, function (xs) {
as.integer(cut(xs, breaks = c(-1, 3, 5, 7:10), ordered_result = TRUE))
}))
summary(dat)
# Compute average score
dat$mean_score <- rowMeans(dat[, paste0("x", 1:9)])
head(dat)
hist(dat$mean_score)
### Block 05 -- Provide information
user_input <- list(
min = 1, # Theoretical minimum of each item
max = 6, # Theoretical maximum of each item
n_items = 9 # Number of averaged items
)
### Block 06 -- Data pre-processing
# Create ordinal outcome
dat$y_ord <- helper$create_stan_data(dat$mean_score, user_input)
table(dat$y_ord)
summary(dat)
### Block 07 -- Example model
# Some random model, set priors below as needed
fit_0 <- brm(
y_ord ~ factor(sex) + I(ageyr - 11) + factor(school), dat,
family = helper$cont_cumul, # Custom log-likelihood defined in helper file
seed = 12345, cores = 4,
stanvars = helper$make_sv(user_input), # Additional variables/params for custom LL
prior = c(prior(student_t(3, 0, 2.5), class = "Intercept")))
print(fit_0, digits = 4)
# Just to check with ordinalCont (frequentist approach)
# ocm does not reverse the sign of coefficients
summary(ocm(
mean_score ~ factor(sex) + I(ageyr - 11) + factor(school),
dat, scale = c(1, 5)))
# Another random model, set priors below as needed
fit_1 <- brm(
y_ord ~ factor(sex) + factor(school) + (1 | ageyr), dat,
family = helper$cont_cumul, seed = 12345, cores = 4,
stanvars = helper$make_sv(user_input),
prior = c(prior(student_t(3, 0, 2.5), class = "Intercept")))
print(fit_1, digits = 4)
summary(ocm(
mean_score ~ factor(sex) + factor(school) + (1 | ageyr),
dat, scale = c(1, 5)))
helper <- list()
helper$points <- function (user_inp) {
with(user_inp, seq(min * n_items, max * n_items) / n_items)
}
helper$create_splines <- function (user_inp) {
N_cuts <- length(helper$points(user_inp)) - 1
# df = number of internal cuts + degree + intercept
df <- min(15 + 2 + 1, N_cuts - 2)
Spl <- -.5 + splines2::iSpline(
1:N_cuts, df = df, degree = 2, intercept = TRUE)
return(Spl)
}
helper$create_stan_data <- function (y, user_inp) {
points <- helper$points(user_inp)
as.integer(round(user_inp$n_items * (y - user_inp$min) + 1))
}
# To use vectorized log-likelihood:
# Call helper$cont_cumul with loop = FALSE;
# Call helper$make_sv with weighted = FALSE;
# This is useful if trying to extend model to include covariates.
helper$cont_cumul <- function (loop = FALSE) {
brms::custom_family(
"cont_Cumul", dpars = c("mu"), links = c("identity"),
type = "int", vars = c("cuts"), loop = loop)
}
helper$make_sv <- function (user_inp, weighted = FALSE) {
if (weighted) {
stan_funs <- "
real cont_Cumul_lpmf(int x, real mu, vector cuts) {
return ordered_logistic_lpmf(x | mu, cuts);
}
"
} else {
stan_funs <- "
real cont_Cumul_lpmf(int[] x, vector mu, vector cuts) {
return ordered_logistic_lpmf(x | mu, cuts);
}
"
}
create_cuts <- "
vector[N_cuts] cuts = Spl * gamma_Xtra_0;
"
N_cuts <- as.integer(user_inp$n_items * (user_inp$max - user_inp$min))
Spl <- helper$create_splines(user_inp)
res <- c(
stanvar(scode = stan_funs, block = "functions"),
stanvar(N_cuts, name = "N_cuts", block = "data"),
stanvar(ncol(Spl), name = "N_gamma", block = "data"),
stanvar(Spl, name = "Spl", block = "data"),
stanvar(scode = "vector<lower = 0>[N_gamma] gamma_Xtra_0;", block = "parameters"),
stanvar(scode = create_cuts, block = "model"),
stanvar(scode = "gamma_Xtra_0 ~ std_normal();", block = "model"))
return(res)
}
@stonegold546
Copy link
Author

stonegold546 commented May 24, 2022

  • This assumes no missing data in original items that were summed. May be better to delete any such cases first(?)
  • Use splines to model the thresholds
    • weights must be non-negative
    • weights have standard half-normal prior, something more thoughtful is possible, see line 57 of helper file
    • thresholds are estimated for theoretically cut-points (based on Likert items) not observed cut-points.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment