Skip to content

Instantly share code, notes, and snippets.

@andrewheiss
Created May 10, 2023 01:52
Show Gist options
  • Save andrewheiss/a17fdd198e0ec67349e916f9234f31e2 to your computer and use it in GitHub Desktop.
Save andrewheiss/a17fdd198e0ec67349e916f9234f31e2 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(brms)
library(tidybayes)


data_thing <- tribble(
  ~answer,               ~regime_type,  ~n,
  "Very familiar",       "Democracy",   117,
  "Very familiar",       "Autocracy",   88,
  "Somewhat familiar",   "Democracy",   169,
  "Somewhat familiar",   "Autocracy",   80,
  "Not familiar at all", "Democracy",   66,
  "Not familiar at all", "Autocracy",   19
) %>% 
  group_by(answer) %>% 
  mutate(total = sum(n)) %>% 
  ungroup() %>%
  mutate(prop = n / total)
data_thing
#> # A tibble: 6 × 5
#>   answer              regime_type     n total  prop
#>   <chr>               <chr>       <dbl> <dbl> <dbl>
#> 1 Very familiar       Democracy     117   205 0.571
#> 2 Very familiar       Autocracy      88   205 0.429
#> 3 Somewhat familiar   Democracy     169   249 0.679
#> 4 Somewhat familiar   Autocracy      80   249 0.321
#> 5 Not familiar at all Democracy      66    85 0.776
#> 6 Not familiar at all Autocracy      19    85 0.224

# Response proportions across regime types
data_thing %>% 
  ggplot(aes(x = prop, y = answer, fill = regime_type)) +
  geom_col(position = position_dodge())

# Main question: is there a substantial difference in proportions of possible
# answers (not familiar vs. somewhat familiar; somewhat familiar vs. very
# familiar) for respondents working in autocracies?
#
# Can't just compare percentages (0.224 vs. 0.321; 0.321 vs. 0.429) because
# denominators are different across answers. So we have to use binomial family
# with n | trials(total) to account for denominators

mod <- brm(
  bf(n | trials(total) ~ 0 + answer), 
  data = filter(data_thing, regime_type == "Autocracy"),
  family = binomial(link = "identity"),
  prior = c(prior(beta(2, 2), class = b, ub = 1, lb = 0)),
  backend = "cmdstanr"
)

# ↓ These are the percentages of each category (neat!)
mod
#>  Family: binomial 
#>   Links: mu = identity 
#> Formula: n | trials(total) ~ 0 + answer 
#>    Data: filter(data_thing, regime_type == "Autocracy") (Number of observations: 3) 
#>   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
#>          total post-warmup draws = 4000
#> 
#> Population-Level Effects: 
#>                        Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
#> answerNotfamiliaratall     0.24      0.04     0.15     0.33 1.00     3758
#> answerSomewhatfamiliar     0.32      0.03     0.27     0.38 1.00     4586
#> answerVeryfamiliar         0.43      0.03     0.37     0.50 1.00     4394
#> 
#> Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
#> and Tail_ESS are effective sample size measures, and Rhat is the potential
#> scale reduction factor on split chains (at convergence, Rhat = 1).

# To find difference between not familiar vs. somewhat familiar and somewhat
# familiar vs. very familiar, we can find pairwise diffs from posterior
pairwise_diffs <- mod %>% 
  gather_draws(`^b_answer.*`, regex = TRUE) %>% 
  ungroup() %>% 
  compare_levels(.value, by = .variable)

# Median differences
# - not familiar vs. somewhat familiar = 0.0892ish and definitely not 0
# - somewhat familiar vs. very familiar = 0.106ish and definitely not 0
pairwise_diffs %>% 
  summarize(post_median = median_qi(.value, .width = 0.95),
            p_greater_0 = sum(.value > 0) / n()) %>% 
  unnest(post_median)
#> # A tibble: 3 × 8
#>   .variable                  y    ymin  ymax .width .point .interval p_greater_0
#>   <chr>                  <dbl>   <dbl> <dbl>  <dbl> <chr>  <chr>           <dbl>
#> 1 b_answerSomewhatfami… 0.0888 -0.0219 0.189   0.95 median qi              0.948
#> 2 b_answerVeryfamiliar… 0.195   0.0831 0.299   0.95 median qi              1.00 
#> 3 b_answerVeryfamiliar… 0.105   0.0208 0.194   0.95 median qi              0.990

# Plot
pairwise_diffs %>% 
  ggplot(aes(x = .value, fill = .variable)) +
  stat_halfeye() +
  guides(fill = "none") +
  facet_wrap(vars(.variable), ncol = 1)

Created on 2023-05-09 with reprex v2.0.2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment