Skip to content

Instantly share code, notes, and snippets.

@strengejacke
Last active September 26, 2022 09:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save strengejacke/eb607b015b8041f378d56e844d5d38cf to your computer and use it in GitHub Desktop.
Save strengejacke/eb607b015b8041f378d56e844d5d38cf to your computer and use it in GitHub Desktop.
Example showing different ways of modelling ordinal predictors, and how their model summaries and predictions look like.
library(ggeffects)
library(parameters)
library(brms)
income_options <- c("below_20", "20_to_40", "40_to_100", "greater_100")
income <- factor(sample(income_options, 100, TRUE),
levels = income_options, ordered = TRUE)
mean_ls <- c(30, 60, 70, 75)
ls <- mean_ls[income] + rnorm(100, sd = 7)
dat <- data.frame(income, ls)
dat$income2 <- dat$income
contrasts(dat$income2) <- MASS::contr.sdif
dat$income3 <- factor(dat$income, ordered = FALSE)
dat$income_n <- as.numeric(dat$income)
# fit a simple monotonic model
fit1 <- brm(ls ~ mo(income), data = dat, refresh = 0, iter = 200, chains = 2)
fit2 <- lm(ls ~ income, data = dat)
fit3 <- lm(ls ~ income2, data = dat)
fit4 <- lm(ls ~ income3, data = dat)
fit5 <- lm(ls ~ poly(income_n, 3), data = dat)
model_parameters(fit1)
#> # Fixed effects
#>
#> Parameter | Median | 95% CI | pd | % in ROPE | Rhat | ESS
#> ------------------------------------------------------------------------
#> (Intercept) | 27.34 | [23.35, 29.78] | 100% | 0% | 0.991 | 51.00
#> moincome | 15.76 | [14.76, 17.36] | 100% | 0% | 0.995 | 64.00
#>
#> # Fixed effects (monotonic effects)
#>
#> Parameter | Median | 95% CI | pd | % in ROPE | Rhat | ESS
#> ----------------------------------------------------------------------
#> income1[1] | 0.72 | [0.66, 0.80] | 100% | 100% | 1.005 | 187.00
#> income1[2] | 0.21 | [0.11, 0.30] | 100% | 100% | 0.994 | 151.00
#> income1[3] | 0.06 | [0.00, 0.12] | 100% | 100% | 0.996 | 107.00
#>
#> Uncertainty intervals (highest-density) and p values (two-tailed) computed
#> using a MCMC distribution approximation.
compare_parameters("ordinal" = fit2, "successive diff" = fit3,
"categorical" = fit4, "poly" = fit5)
#> Parameter | ordinal | successive diff | categorical | poly
#> -----------------------------------------------------------------------------------------------------------------------
#> (Intercept) | 58.85 ( 57.46, 60.24) | 58.85 (57.46, 60.24) | 27.21 (24.45, 29.96) | 60.30 ( 58.97, 61.62)
#> income (linear) | 34.03 ( 31.50, 36.57) | | |
#> income (quadratic) | -15.92 (-18.69, -13.15) | | |
#> income (cubic) | 3.81 ( 0.81, 6.80) | | |
#> income2 (2-1) | | 34.55 (30.65, 38.45) | |
#> income2 (3-2) | | 10.12 ( 5.81, 14.42) | |
#> income2 (4-3) | | 2.70 (-1.24, 6.64) | |
#> income3 (20 to 40) | | | 34.55 (30.65, 38.45) |
#> income3 (40 to 100) | | | 44.66 (40.36, 48.97) |
#> income3 (greater 100) | | | 47.36 (43.87, 50.86) |
#> income n (1st degree) | | | | 169.61 (156.39, 182.84)
#> income n (2nd degree) | | | | -77.89 (-91.11, -64.67)
#> income n (3rd degree) | | | | 16.82 ( 3.60, 30.04)
#> -----------------------------------------------------------------------------------------------------------------------
#> Observations | 100 | 100 | 100 | 100
# brms
ggpredict(fit1, "income")
#> # Predicted values of ls
#>
#> income | Predicted | 95% CI
#> ----------------------------------------
#> below_20 | 27.34 | [23.74, 30.13]
#> 20_to_40 | 61.65 | [58.90, 64.53]
#> 40_to_100 | 71.72 | [68.14, 74.71]
#> greater_100 | 74.68 | [72.39, 76.82]
# raw ordinal
ggpredict(fit2, "income")
#> # Predicted values of ls
#>
#> income | Predicted | 95% CI
#> ----------------------------------------
#> below_20 | 27.21 | [24.48, 29.93]
#> 20_to_40 | 61.75 | [59.03, 64.48]
#> 40_to_100 | 71.87 | [68.61, 75.13]
#> greater_100 | 74.57 | [72.45, 76.69]
# ordinal, successive differences
ggpredict(fit3, "income2")
#> # Predicted values of ls
#>
#> income2 | Predicted | 95% CI
#> ----------------------------------------
#> below_20 | 27.21 | [24.48, 29.93]
#> 20_to_40 | 61.75 | [59.03, 64.48]
#> 40_to_100 | 71.87 | [68.61, 75.13]
#> greater_100 | 74.57 | [72.45, 76.69]
# categorical
ggpredict(fit4, "income3")
#> # Predicted values of ls
#>
#> income3 | Predicted | 95% CI
#> ----------------------------------------
#> below_20 | 27.21 | [24.48, 29.93]
#> 20_to_40 | 61.75 | [59.03, 64.48]
#> 40_to_100 | 71.87 | [68.61, 75.13]
#> greater_100 | 74.57 | [72.45, 76.69]
# poly
ggpredict(fit5, "income_n")
#> # Predicted values of ls
#>
#> income_n | Predicted | 95% CI
#> -------------------------------------
#> 1 | 27.21 | [24.48, 29.93]
#> 2 | 61.75 | [59.03, 64.48]
#> 3 | 71.87 | [68.61, 75.13]
#> 4 | 74.57 | [72.45, 76.69]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment