library(tidyverse)
library(brms)
library(tidybayes)
data_thing <- tribble(
~answer, ~regime_type, ~n,
"Very familiar", "Democracy", 117,
"Very familiar", "Autocracy", 88,
"Somewhat familiar", "Democracy", 169,
"Somewhat familiar", "Autocracy", 80,
"Not familiar at all", "Democracy", 66,
"Not familiar at all", "Autocracy", 19
) %>%
group_by(answer) %>%
mutate(total = sum(n)) %>%
ungroup() %>%
mutate(prop = n / total)
data_thing
#> # A tibble: 6 × 5
#> answer regime_type n total prop
#> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 Very familiar Democracy 117 205 0.571
#> 2 Very familiar Autocracy 88 205 0.429
#> 3 Somewhat familiar Democracy 169 249 0.679
#> 4 Somewhat familiar Autocracy 80 249 0.321
#> 5 Not familiar at all Democracy 66 85 0.776
#> 6 Not familiar at all Autocracy 19 85 0.224
# Response proportions across regime types
data_thing %>%
ggplot(aes(x = prop, y = answer, fill = regime_type)) +
geom_col(position = position_dodge())
# Main question: is there a substantial difference in proportions of possible
# answers (not familiar vs. somewhat familiar; somewhat familiar vs. very
# familiar) for respondents working in autocracies?
#
# Can't just compare percentages (0.224 vs. 0.321; 0.321 vs. 0.429) because
# denominators are different across answers. So we have to use binomial family
# with n | trials(total) to account for denominators
mod <- brm(
bf(n | trials(total) ~ 0 + answer),
data = filter(data_thing, regime_type == "Autocracy"),
family = binomial(link = "identity"),
prior = c(prior(beta(2, 2), class = b, ub = 1, lb = 0)),
backend = "cmdstanr"
)
# ↓ These are the percentages of each category (neat!)
mod
#> Family: binomial
#> Links: mu = identity
#> Formula: n | trials(total) ~ 0 + answer
#> Data: filter(data_thing, regime_type == "Autocracy") (Number of observations: 3)
#> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
#> total post-warmup draws = 4000
#>
#> Population-Level Effects:
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
#> answerNotfamiliaratall 0.24 0.04 0.15 0.33 1.00 3758
#> answerSomewhatfamiliar 0.32 0.03 0.27 0.38 1.00 4586
#> answerVeryfamiliar 0.43 0.03 0.37 0.50 1.00 4394
#>
#> Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
#> and Tail_ESS are effective sample size measures, and Rhat is the potential
#> scale reduction factor on split chains (at convergence, Rhat = 1).
# To find difference between not familiar vs. somewhat familiar and somewhat
# familiar vs. very familiar, we can find pairwise diffs from posterior
pairwise_diffs <- mod %>%
gather_draws(`^b_answer.*`, regex = TRUE) %>%
ungroup() %>%
compare_levels(.value, by = .variable)
# Median differences
# - not familiar vs. somewhat familiar = 0.0892ish and definitely not 0
# - somewhat familiar vs. very familiar = 0.106ish and definitely not 0
pairwise_diffs %>%
summarize(post_median = median_qi(.value, .width = 0.95),
p_greater_0 = sum(.value > 0) / n()) %>%
unnest(post_median)
#> # A tibble: 3 × 8
#> .variable y ymin ymax .width .point .interval p_greater_0
#> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
#> 1 b_answerSomewhatfami… 0.0888 -0.0219 0.189 0.95 median qi 0.948
#> 2 b_answerVeryfamiliar… 0.195 0.0831 0.299 0.95 median qi 1.00
#> 3 b_answerVeryfamiliar… 0.105 0.0208 0.194 0.95 median qi 0.990
# Plot
pairwise_diffs %>%
ggplot(aes(x = .value, fill = .variable)) +
stat_halfeye() +
guides(fill = "none") +
facet_wrap(vars(.variable), ncol = 1)
Created on 2023-05-09 with reprex v2.0.2