Skip to content

Instantly share code, notes, and snippets.

@jcblsn
Created October 14, 2025 23:49
Show Gist options
  • Select an option

  • Save jcblsn/41cc277699da0d89dced5d5310423331 to your computer and use it in GitHub Desktop.

Select an option

Save jcblsn/41cc277699da0d89dced5d5310423331 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(survey)
# download data from https://rankings.thefire.org/explore?demo=all&year=2025
df = read_csv("~/Downloads/fire_survey_data_all.csv") |> janitor::clean_names()
codebook = read_csv("~/Downloads/cfsr_megafile_codebook.csv") |> janitor::clean_names()
gender_bin_key =
codebook |>
fill(variable, .direction = "down") |>
filter(
variable == 'gender_bin',
!is.na(value)
)
gender_bin_key_nonmissing = gender_bin_key |>
filter(
value_label %in% c("Male", "Female", "Gender non-conforming")
)
df_clean <- df |>
mutate(
gender_bin_labeled = factor(
gender_bin,
levels = gender_bin_key_nonmissing$value,
labels = gender_bin_key_nonmissing$value_label
)
)
survey_design <- survey::svydesign(ids = ~1, weights = ~weight, data = df_clean)
survey_design_unweighted_illustration <- survey::svydesign(ids = ~1, weights = ~1, data = df_clean)
est_by_year <- survey::svyby(
~gender_bin_labeled,
~survey_year,
survey_design,
svymean,
na.rm = TRUE,
vartype = c("se","ci")
) |>
mutate(type = 'weighted')
est_by_year_unweighted <- survey::svyby(
~gender_bin_labeled,
~survey_year,
survey_design_unweighted_illustration,
svymean,
na.rm = TRUE,
vartype = c("se","ci")
) |>
mutate(type = 'unweighted')
survey_estimates <-
bind_rows(
est_by_year,
est_by_year_unweighted
) |>
pivot_longer(cols = -c(survey_year, type)) |>
separate(col = name, sep = "gender_bin_labeled", into = c('metric', 'level'), remove = T) |>
mutate(
metric = case_when(
metric == "" ~ "estimate",
T ~ str_remove(metric, "\\.")
)
) |>
pivot_wider(
id_cols = c(survey_year, type, level),
names_from = metric,
values_from = value
) |>
mutate(
estimate_str = if_else(level == "Gender non-conforming", str_c(round(estimate * 100, 0), "%"), NA),
type = fct_relevel(type, c("weighted", "unweighted")),
level = fct_relevel(level, c("Female", "Male", "Gender non-conforming"))
)
pal <- c(
"Female" = "#B56576",
"Male" = "#6576B5",
"Gender non-conforming" = "#76B565"
)
survey_estimates |> filter(level == "Gender non-conforming")
chart <-
survey_estimates |>
ggplot() +
geom_errorbar(
aes(
x = survey_year,
ymin = ci_l,
ymax = ci_u,
y = estimate,
color = level,
linetype = type
),
width = 0.05
) +
geom_line(
aes(
x = survey_year,
y = estimate,
color = level,
linetype = type
)
) +
geom_text(
aes(
x = survey_year,
y = estimate,
group = str_c(level, type),
label = estimate_str,
),
size = 5/14 * 8,
hjust = 0.5,
vjust = -0.75
) +
xlab(NULL) +
scale_y_continuous(
name = "Proportion of college students",
labels = scales::percent,
limits = c(0, NA)
) +
scale_linetype_discrete(
name = "Estimate type"
) +
scale_color_manual(
name = NULL,
values = pal,
breaks = names(pal)
) +
labs(
caption = "Jacob Eliason | data via FIRE"
) +
theme_bw()
ggsave(
"2025-10-14-weighted_vs_unweighted_estimates.png",
chart,
height = 9,
width = 16,
scale = 1/2,
bg = 'white'
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment