Last active
October 3, 2024 20:46
-
-
Save saudiwin/7358a98560c7bd7f78d14c7b1e5b40aa to your computer and use it in GitHub Desktop.
How to calculate the probability of the discrete end points of a response in an ordbetareg model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# calculate Pr(Y=1) and Pr(Y=0) (i.e. the discrete categories of the scale) | |
# for different groups in the data | |
# useful for treatment/control comparisons | |
library(ordbetareg) | |
library(dplyr) | |
library(tidyr) | |
library(posterior) | |
library(tidybayes) | |
library(ggplot2) | |
data("pew") | |
# prepare data | |
model_data <- select(pew,therm, | |
education="F_EDUCCAT2_FINAL", | |
region="F_CREGION_FINAL", | |
income="F_INCOME_FINAL") | |
# we will look at probabilities of y=1 or y=0 for different levels of education | |
table(model_data$education) | |
ord_fit_mean <- ordbetareg(formula=therm ~ education + income + | |
(1|region), | |
data=model_data, | |
cores=2,chains=2) | |
# function that will return tibble with Pr(y=1) and Pr(y=0) | |
return_pr_01 <- function(object) { | |
# get linear prediction | |
ord_pred <- add_linpred_draws(newdata=object$data, | |
object=object,ndraws=200) | |
# need top and bottom cutpoints | |
cut_draws <- spread_draws(object,cutzero,cutone) | |
# merge with linear prediction | |
ord_pred <- left_join(ord_pred, | |
distinct(select(cut_draws,.draw, | |
cutzero,cutone)), by=c(".draw")) | |
# calculate pr_0 and pr_1 | |
ord_pred <- group_by(ord_pred, .draw) %>% | |
mutate(pr_y_1=plogis(.linpred - cutone), | |
pr_y_0=1 - plogis(.linpred - cutzero)) | |
return(ungroup(ord_pred)) | |
} | |
# what we want is the difference in Pr(Y=1) between two groups in the data: | |
# education = Less than high school and education = Postgraduate | |
# collapse to this quantity for each posterior draw (group by .draw) | |
ord_pry1y0 <- return_pr_01(ord_fit_mean) | |
change_groups_y1 <- group_by(ord_pry1y0, .draw) %>% | |
summarize(group_diff=mean(pr_y_1[education=="Postgraduate"]) - mean(pr_y_1[education=="Less than high school"])) | |
# look at the estimate of the difference in terms of posterior draws: | |
hist(change_groups_y1$group_diff) | |
summary(change_groups_y1$group_diff) | |
# difference is mostly positive (more 1s in the postgraduate group relative to low-education group) | |
# test with 5% - 95% interval | |
quantile(change_groups_y1$group_diff, prob=c(.05, .95)) | |
# this interval is only positive values | |
# also plot with ggplot & tidybayes | |
change_groups_y1 %>% | |
ggplot(aes(x=group_diff)) + | |
stat_halfeye() | |
# now the same for Pr(y=0) | |
change_groups_y0 <- group_by(ord_pry1y0, .draw) %>% | |
summarize(group_diff=mean(pr_y_0[education=="Postgraduate"]) - mean(pr_y_0[education=="Less than high school"])) | |
# look at the estimate of the difference in terms of posterior draws: | |
hist(change_groups_y0$group_diff) | |
summary(change_groups_y0$group_diff) | |
# difference is more negative (more 0s in the postgraduate group relative to low-education group) | |
# test with 5% - 95% interval | |
quantile(change_groups_y0$group_diff, prob=c(.05, .95)) | |
# this interval includes only negative values | |
# also plot with ggplot & tidybayes | |
change_groups_y0 %>% | |
ggplot(aes(x=group_diff)) + | |
stat_halfeye() | |
# combined plot | |
bind_rows(list(`Pr(Y=1)`=change_groups_y1, | |
`Pr(Y=0)`=change_groups_y0), | |
.id="Outcome Type") %>% | |
ggplot(aes(x=group_diff)) + | |
stat_halfeye(aes(fill=`Outcome Type`), | |
alpha=0.5) + | |
labs(x="Difference in Probability of Outcome from Low to High Education", | |
y="") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment