Last active
May 24, 2022 20:33
-
-
Save stonegold546/9a01f0ce1757c5a0cfb59d0055e19eb2 to your computer and use it in GitHub Desktop.
Continuous data ordinal regression using monotonic splines to estimate thresholds
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Block 02 -- Load packages | |
library(ordinalCont) # Just to check frequentist | |
library(splines2) | |
library(brms) | |
### Block 03 -- Load helper functions | |
source("helper_file_ord_generic.R") | |
### Block 04 -- Create demo dataset | |
# For lack of a better example -- creating fake Likert, 6 levels??? | |
head(dat <- lavaan::HolzingerSwineford1939) | |
summary(dat[, paste0("x", 1:9)]) | |
dat <- cbind(dat[, 1:6], apply(dat[, paste0("x", 1:9)], 2, function (xs) { | |
as.integer(cut(xs, breaks = c(-1, 3, 5, 7:10), ordered_result = TRUE)) | |
})) | |
summary(dat) | |
# Compute average score | |
dat$mean_score <- rowMeans(dat[, paste0("x", 1:9)]) | |
head(dat) | |
hist(dat$mean_score) | |
### Block 05 -- Provide information | |
user_input <- list( | |
min = 1, # Theoretical minimum of each item | |
max = 6, # Theoretical maximum of each item | |
n_items = 9 # Number of averaged items | |
) | |
### Block 06 -- Data pre-processing | |
# Create ordinal outcome | |
dat$y_ord <- helper$create_stan_data(dat$mean_score, user_input) | |
table(dat$y_ord) | |
summary(dat) | |
### Block 07 -- Example model | |
# Some random model, set priors below as needed | |
fit_0 <- brm( | |
y_ord ~ factor(sex) + I(ageyr - 11) + factor(school), dat, | |
family = helper$cont_cumul, # Custom log-likelihood defined in helper file | |
seed = 12345, cores = 4, | |
stanvars = helper$make_sv(user_input), # Additional variables/params for custom LL | |
prior = c(prior(student_t(3, 0, 2.5), class = "Intercept"))) | |
print(fit_0, digits = 4) | |
# Just to check with ordinalCont (frequentist approach) | |
# ocm does not reverse the sign of coefficients | |
summary(ocm( | |
mean_score ~ factor(sex) + I(ageyr - 11) + factor(school), | |
dat, scale = c(1, 5))) | |
# Another random model, set priors below as needed | |
fit_1 <- brm( | |
y_ord ~ factor(sex) + factor(school) + (1 | ageyr), dat, | |
family = helper$cont_cumul, seed = 12345, cores = 4, | |
stanvars = helper$make_sv(user_input), | |
prior = c(prior(student_t(3, 0, 2.5), class = "Intercept"))) | |
print(fit_1, digits = 4) | |
summary(ocm( | |
mean_score ~ factor(sex) + factor(school) + (1 | ageyr), | |
dat, scale = c(1, 5))) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
helper <- list() | |
helper$points <- function (user_inp) { | |
with(user_inp, seq(min * n_items, max * n_items) / n_items) | |
} | |
helper$create_splines <- function (user_inp) { | |
N_cuts <- length(helper$points(user_inp)) - 1 | |
# df = number of internal cuts + degree + intercept | |
df <- min(15 + 2 + 1, N_cuts - 2) | |
Spl <- -.5 + splines2::iSpline( | |
1:N_cuts, df = df, degree = 2, intercept = TRUE) | |
return(Spl) | |
} | |
helper$create_stan_data <- function (y, user_inp) { | |
points <- helper$points(user_inp) | |
as.integer(round(user_inp$n_items * (y - user_inp$min) + 1)) | |
} | |
# To use vectorized log-likelihood: | |
# Call helper$cont_cumul with loop = FALSE; | |
# Call helper$make_sv with weighted = FALSE; | |
# This is useful if trying to extend model to include covariates. | |
helper$cont_cumul <- function (loop = FALSE) { | |
brms::custom_family( | |
"cont_Cumul", dpars = c("mu"), links = c("identity"), | |
type = "int", vars = c("cuts"), loop = loop) | |
} | |
helper$make_sv <- function (user_inp, weighted = FALSE) { | |
if (weighted) { | |
stan_funs <- " | |
real cont_Cumul_lpmf(int x, real mu, vector cuts) { | |
return ordered_logistic_lpmf(x | mu, cuts); | |
} | |
" | |
} else { | |
stan_funs <- " | |
real cont_Cumul_lpmf(int[] x, vector mu, vector cuts) { | |
return ordered_logistic_lpmf(x | mu, cuts); | |
} | |
" | |
} | |
create_cuts <- " | |
vector[N_cuts] cuts = Spl * gamma_Xtra_0; | |
" | |
N_cuts <- as.integer(user_inp$n_items * (user_inp$max - user_inp$min)) | |
Spl <- helper$create_splines(user_inp) | |
res <- c( | |
stanvar(scode = stan_funs, block = "functions"), | |
stanvar(N_cuts, name = "N_cuts", block = "data"), | |
stanvar(ncol(Spl), name = "N_gamma", block = "data"), | |
stanvar(Spl, name = "Spl", block = "data"), | |
stanvar(scode = "vector<lower = 0>[N_gamma] gamma_Xtra_0;", block = "parameters"), | |
stanvar(scode = create_cuts, block = "model"), | |
stanvar(scode = "gamma_Xtra_0 ~ std_normal();", block = "model")) | |
return(res) | |
} |
Author
stonegold546
commented
May 24, 2022
•
- This assumes no missing data in original items that were summed. May be better to delete any such cases first(?)
- Use splines to model the thresholds
- weights must be non-negative
- weights have standard half-normal prior, something more thoughtful is possible, see line 57 of helper file
- thresholds are estimated for theoretically cut-points (based on Likert items) not observed cut-points.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment