Skip to content

Instantly share code, notes, and snippets.

@ryanburge
Created February 4, 2025 02:25
Show Gist options
  • Save ryanburge/43f0520d8a03415eb0f9e1ab59b90bb6 to your computer and use it in GitHub Desktop.
Save ryanburge/43f0520d8a03415eb0f9e1ab59b90bb6 to your computer and use it in GitHub Desktop.
library(rio)
library(janitor)
cen <- import("E://data/pulse42.sas7bdat") %>% clean_names()
cen %>% ct(support3)
cen <- cen %>%
mutate(state = frcode(
est_st == "01" ~ "Alabama",
est_st == "02" ~ "Alaska",
est_st == "04" ~ "Arizona",
est_st == "05" ~ "Arkansas",
est_st == "06" ~ "California",
est_st == "08" ~ "Colorado",
est_st == "09" ~ "Connecticut",
est_st == "10" ~ "Delaware",
est_st == "11" ~ "District of Columbia",
est_st == "12" ~ "Florida",
est_st == "13" ~ "Georgia",
est_st == "15" ~ "Hawaii",
est_st == "16" ~ "Idaho",
est_st == "17" ~ "Illinois",
est_st == "18" ~ "Indiana",
est_st == "19" ~ "Iowa",
est_st == "20" ~ "Kansas",
est_st == "21" ~ "Kentucky",
est_st == "22" ~ "Louisiana",
est_st == "23" ~ "Maine",
est_st == "24" ~ "Maryland",
est_st == "25" ~ "Massachusetts",
est_st == "26" ~ "Michigan",
est_st == "27" ~ "Minnesota",
est_st == "28" ~ "Mississippi",
est_st == "29" ~ "Missouri",
est_st == "30" ~ "Montana",
est_st == "31" ~ "Nebraska",
est_st == "32" ~ "Nevada",
est_st == "33" ~ "New Hampshire",
est_st == "34" ~ "New Jersey",
est_st == "35" ~ "New Mexico",
est_st == "36" ~ "New York",
est_st == "37" ~ "North Carolina",
est_st == "38" ~ "North Dakota",
est_st == "39" ~ "Ohio",
est_st == "40" ~ "Oklahoma",
est_st == "41" ~ "Oregon",
est_st == "42" ~ "Pennsylvania",
est_st == "44" ~ "Rhode Island",
est_st == "45" ~ "South Carolina",
est_st == "46" ~ "South Dakota",
est_st == "47" ~ "Tennessee",
est_st == "48" ~ "Texas",
est_st == "49" ~ "Utah",
est_st == "50" ~ "Vermont",
est_st == "51" ~ "Virginia",
est_st == "53" ~ "Washington",
est_st == "54" ~ "West Virginia",
est_st == "55" ~ "Wisconsin",
est_st == "56" ~ "Wyoming"
))
gg_data <- cen %>%
mutate(ff = support3) %>%
mutate(ff = frcode(ff == 1 ~ "Zero",
ff == 2 | ff == 3 ~ "1 to 11",
ff == 4 ~ "12+")) %>%
group_by(state) %>%
ct(ff, wt = pweight, show_na = FALSE) %>%
ungroup() %>%
mutate(state = factor(state, levels = cur_data() %>% # Correct reference
filter(ff == "12+") %>%
arrange(desc(pct)) %>%
pull(state)))
gg_data %>%
mutate(lab = round(pct, 2)) %>%
ggplot(., aes(x = 1, y = pct, fill = fct_rev(ff))) +
geom_col(color = "black") +
coord_flip() +
facet_wrap(~ state, ncol =1, strip.position = "left") +
theme_rb() +
scale_fill_manual(values = c("#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F")) + # New custom five-color palette
theme(legend.position = "bottom") +
scale_y_continuous(labels = percent) +
theme(strip.text.y.left = element_text(angle=0)) +
guides(fill = guide_legend(reverse=T, nrow = 1)) +
theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) +
theme(panel.grid.minor.y=element_blank(), panel.grid.major.y=element_blank()) +
geom_text(aes(label = ifelse(pct >.05, paste0(lab*100, '%'), '')), position = position_stack(vjust = 0.5), size = 4, family = "font", color = "white") +
# geom_text(aes(label = ifelse(age2 == "18-35", paste0(lab*100, '%'), '')), position = position_stack(vjust = 0.5), size = 4, family = "font", color = "white") +
# geom_text(aes(label = ifelse(age2 == "36-44", paste0(lab*100, '%'), '')), position = position_stack(vjust = 0.5), size = 4, family = "font", color = "white") +
theme(plot.title = element_text(size = 16)) +
theme(strip.text.y.left = element_text(angle = 0, hjust = 1)) +
labs(x = "", y = "", title = "Religious Attendance Over the Prior Year by State", caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024")
save("pulse_attend_state_level.png", wd = 9, ht = 15)
mapp <- gg %>% filter(ff == "12+")
mapp$id <- tolower(mapp$state)
library(fiftystater)
data("fifty_states")
mapp <- left_join(fifty_states, mapp, by = c("id")) %>% as_tibble()
map <- mapp %>%
mutate(bins = frcode(pct >= .30 ~ "30%+",
pct >= .25 ~ "25%-30%",
pct >= .20 ~ "20%-25%",
pct >= 0 ~ "0%-20%"))
centers <- read_csv("D://state_centers_inset.csv") %>% rename(id = full)
centers$id <- tolower(centers$id)
centers <- left_join(map, centers, by = c("id")) %>% as_tibble()
centers <- centers %>% distinct(id, .keep_all = TRUE)
centers <- centers %>%
mutate(mean = round(pct, 2)) %>%
mutate(lab = mean*100) %>%
mutate(lab = paste0(lab, '%')) %>%
mutate(label = paste(state.y, lab, sep = ": "))
map %>%
ggplot(., mapping = aes(x = long, y = lat, group = group, fill = bins)) +
geom_polygon(color = "black", linewidth = 0.1) +
coord_map(projection = "albers", lat0 = 41, lat1 = 44) +
scale_fill_manual(
values = c("#ce1256", "#df65b0", "#d4b9da", "#f7f4f9"), # Deep purple to light pink
name = "Nones (%)"
) +
ggrepel::geom_label_repel(data = centers,
aes(x = longitude, y = latitude, label = label, group = group), size = 2.5, fill = "white",
seed = 1002, family = "font", force = .001, show.legend = FALSE) +
theme_rb() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
panel.spacing = unit(0, "lines"),
plot.background = element_blank(),
legend.position = "bottom",
plot.title = element_text(size = 24),
strip.text = element_text(size = 24)) +
guides(fill = guide_legend(reverse=T)) +
labs(title = " Share Who Attend at Least 12 Times Per Year", caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024")
save("pulse_monthly_attend_map.png")
gg <- cen %>%
mutate(age = 2024 - tbirth_year) %>%
mutate(ff = support3) %>%
mutate(hi = case_when(ff == 4 ~ 1,
ff <= 3 ~ 0)) %>%
mutate(age2 = frcode(age <= 39 ~ "18-\n39",
age >= 40 & age <= 64 ~ "40-\n64",
age >= 65 ~ "65+")) %>%
group_by(state, age2) %>%
mean_ci(hi, wt = pweight, ci = .84)
library(geofacet)
gg %>%
mutate(lab = round(mean, 2)) %>%
ggplot(., aes(x = age2, y = mean, fill = age2)) +
geom_col(color = "black") +
facet_geo(~ state) +
theme_rb() +
scale_fill_gdocs() + # Using a different color scale that is included with ggplot2
scale_y_continuous(labels = percent, limits = c(0, .65)) +
geom_text(aes(y = mean + .07, label = paste0(lab*100, '%')), position = position_dodge(width = .9), size = 4, family = "font") +
theme(axis.text.x = element_text(size = 10)) +
theme(strip.text = element_text(size = 10)) +
theme(plot.title = element_text(size = 26)) +
labs(x = "", y = "", title = "Share Who Attend at Least 12 Times Per Year", caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024")
save("geo_facet_pulse_attend.png", wd = 12, ht = 10)
gg <- cen %>%
mutate(ed = frcode(eeduc == 1 | eeduc == 2 ~ "No HS",
eeduc == 3 ~ "HS Grad",
eeduc == 4 ~ "Some College",
eeduc == 5 ~ "2 Yr.",
eeduc == 6 ~ "4 Yr.",
eeduc == 7 ~ "Grad. Degree")) %>%
mutate(age = 2024 - tbirth_year) %>%
mutate(age2 = frcode(age >= 18 & age <= 35 ~ "18-35",
age >= 36 & age <= 44 ~ "36-44",
age >= 45 & age <= 54 ~ "45-54",
age >= 55 & age <= 64 ~ "55-64",
age >= 65 ~ "65+")) %>%
mutate(ff = support3) %>%
mutate(hi = case_when(ff == 4 ~ 1,
ff <= 3 ~ 0)) %>%
group_by(age2, ed) %>%
mean_ci(hi, wt = pweight)
gg <- gg %>% mutate(bins = frcode(
mean >= 0.30 ~ "30%+",
mean >= 0.25 ~ "25 to 30%",
mean >= 0.20 ~ "20 to 25%",
TRUE ~ "Less than 20%"
))
gg %>%
mutate(lab = round(mean, 2)) %>%
ggplot(., aes(x = age2, y = ed, fill = bins)) +
geom_tile(color = "black") +
scale_fill_manual(values = c(
"30%+" = "#08306B", # Darkest Blue
"25 to 30%" = "#2171B5", # Medium Dark Blue
"20 to 25%" = "#6BAED6", # Medium Light Blue
"Less than 20%" = "#C6DBEF" # Lightest Blue
)) +
theme_rb() +
geom_text(aes(x = age2, y = ed, label = paste0(lab*100, '%')), size = 8, family = "font") +
geom_text(aes(x = age2, y = ed, label = ifelse(mean > .25, paste0(lab*100, '%'), "")), size = 8, family = "font", color = "white") +
labs(x= "Age", y = "Education", title = "Attending at Least 12x Per Year by Age and Education",
caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024")
save("pulse_heat_ed_attend.png", wd = 6, ht = 6)
one <- cen %>%
mutate(age = 2024 - tbirth_year) %>%
filter(age <= 45) %>%
mutate(kid1 = case_when(kids_lt1y == 1 ~ 1,
TRUE ~ 0)) %>%
mutate(kid4 = case_when(kids_1_4y == 1 ~ 1,
TRUE ~ 0)) %>%
mutate(kid11 = case_when(kids_5_11y == 1 ~ 1,
TRUE ~ 0)) %>%
mutate(kid17 = case_when(kids_12_17y == 1 ~ 1,
TRUE ~ 0)) %>%
mutate(kid = kid1 + kid4 + kid11 + kid17) %>%
mutate(kid = frcode(kid >= 1 ~ "Yes",
kid == 0 ~ "No")) %>%
mutate(ff = support3) %>%
mutate(ff = frcode(ff == 1 ~ "Zero",
ff == 2 | ff == 3 ~ "1 to 11",
ff == 4 ~ "12+")) %>%
group_by(kid) %>%
ct(ff, wt = pweight, show_na = FALSE) %>%
mutate(type = "Any Children")
two <- cen %>%
mutate(age = 2024 - tbirth_year) %>%
filter(age <= 45) %>%
mutate(kid1 = case_when(kids_lt1y == 1 ~ 1,
TRUE ~ 0)) %>%
mutate(kid = frcode(kid1 >= 1 ~ "Yes",
kid1 == 0 ~ "No")) %>%
mutate(ff = support3) %>%
mutate(ff = frcode(ff == 1 ~ "Zero",
ff == 2 | ff == 3 ~ "1 to 11",
ff == 4 ~ "12+")) %>%
group_by(kid) %>%
ct(ff, wt = pweight, show_na = FALSE) %>%
mutate(type = "Children Under 1 Year Old")
three <- cen %>%
mutate(age = 2024 - tbirth_year) %>%
filter(age <= 45) %>%
mutate(kid4 = case_when(kids_1_4y == 1 ~ 1,
TRUE ~ 0)) %>%
mutate(kid = frcode(kid4 >= 1 ~ "Yes",
kid4 == 0 ~ "No")) %>%
mutate(ff = support3) %>%
mutate(ff = frcode(ff == 1 ~ "Zero",
ff == 2 | ff == 3 ~ "1 to 11",
ff == 4 ~ "12+")) %>%
group_by(kid) %>%
ct(ff, wt = pweight, show_na = FALSE) %>%
mutate(type = "Children 1-4 Years Old")
four <- cen %>%
mutate(age = 2024 - tbirth_year) %>%
filter(age <= 45) %>%
mutate(kid4 = case_when(kids_5_11y == 1 ~ 1,
TRUE ~ 0)) %>%
mutate(kid = frcode(kid4 >= 1 ~ "Yes",
kid4 == 0 ~ "No")) %>%
mutate(ff = support3) %>%
mutate(ff = frcode(ff == 1 ~ "Zero",
ff == 2 | ff == 3 ~ "1 to 11",
ff == 4 ~ "12+")) %>%
group_by(kid) %>%
ct(ff, wt = pweight, show_na = FALSE) %>%
mutate(type = "Children 5-11 Years Old")
five <- cen %>%
mutate(age = 2024 - tbirth_year) %>%
filter(age <= 45) %>%
mutate(kid4 = case_when(kids_12_17y == 1 ~ 1,
TRUE ~ 0)) %>%
mutate(kid = frcode(kid4 >= 1 ~ "Yes",
kid4 == 0 ~ "No")) %>%
mutate(ff = support3) %>%
mutate(ff = frcode(ff == 1 ~ "Zero",
ff == 2 | ff == 3 ~ "1 to 11",
ff == 4 ~ "12+")) %>%
group_by(kid) %>%
ct(ff, wt = pweight, show_na = FALSE) %>%
mutate(type = "Children 12-17 Years Old")
all <- bind_rows(one, two, three, four, five)
all <- all %>%
mutate(type = fct_relevel(type,
"Any Children",
"Children Under 1 Year Old",
"Children 1-4 Years Old",
"Children 5-11 Years Old",
"Children 12-17 Years Old"
))
all %>%
mutate(lab = round(pct, 2)) %>%
ggplot(., aes(x = fct_rev(kid), y = pct, fill = fct_rev(ff))) +
geom_col(color = "black") +
coord_flip() +
facet_wrap(~ type, ncol = 1, strip.position = "top") +
theme_rb() +
scale_fill_manual(values = c("#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F")) + # New custom five-color palette
theme(legend.position = "bottom") +
scale_y_continuous(labels = scales::percent) +
guides(fill = guide_legend(reverse = TRUE, nrow = 1)) +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
plot.title = element_text(size = 18), # Increase title size slightly
strip.text = element_text(size = 22, face = "bold"), # Increase facet label size
axis.text.y = element_text(size = 28, face = "bold") # **Increase left-side text size**
) +
geom_text(aes(label = ifelse(pct > .05, paste0(lab * 100, '%'), '')),
position = position_stack(vjust = 0.5), size = 8, family = "font", color = "black") +
labs(
x = "",
y = "",
title = "Religious Attendance Among 18-45 Year Olds By Parenting Status",
caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024"
)
save("pulse_attend_kids_types.png", wd = 9, ht = 8)
regg <- cen %>%
mutate(ed = eeduc) %>%
mutate(age = 2024 - tbirth_year) %>%
mutate(ff = support3) %>%
mutate(hi = case_when(ff == 4 ~ 1,
ff <= 3 ~ 0)) %>%
mutate(male = case_when(egenid_birth == 1 ~ 1,
egenid_birth == 2 ~ 0)) %>%
mutate(white = case_when(rrace == 1 ~ 1,
TRUE ~ 0)) %>%
mutate(kids = case_when(thhld_numkid >= 1 ~ 1,
TRUE ~ 0)) %>%
mutate(inc = income) %>%
select(ed, age, hi, male, white, kids, inc)
regg <- regg %>%
mutate(across(where(is.numeric), ~ rescale(.x, to = c(0, 1))))
coef_names <- c("Education" = "ed",
"Age" = "age",
"Male" = "male",
"Income" = "inc",
"White" = "white",
"Has Children" = "kids")
# Run the logistic regression with HC3 robust standard errors
reg1 <- glm(hi ~ ., family = "binomial", data = regg)
gg <- plot_summs(reg1, scale = TRUE, robust = "HC3", coefs = coef_names)
gg +
labs(title = "What Factors Predict a Greater Likelihood of Attending Religious Services Monthly or More?",
x = "Odds Ratio",
y = "",
caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024") +
theme_rb()
save("regress_pulse_attend.png", ht = 4, wd = 10)
### Interaction ###
regg <- cen %>%
mutate(ed = frcode(eeduc == 1 | eeduc == 2 ~ "No HS",
eeduc == 3 ~ "HS Grad",
eeduc == 4 ~ "Some College",
eeduc == 5 ~ "2 Yr.",
eeduc == 6 ~ "4 Yr.",
eeduc == 7 ~ "Grad. Degree")) %>%
mutate(age = 2024 - tbirth_year) %>%
filter(age <= 45) %>%
mutate(ff = support3) %>%
mutate(hi = case_when(ff == 4 ~ 1,
ff <= 3 ~ 0)) %>%
mutate(male = case_when(egenid_birth == 1 ~ 1,
egenid_birth == 2 ~ 0)) %>%
mutate(white = case_when(rrace == 1 ~ 1,
TRUE ~ 0)) %>%
mutate(kids = frcode(thhld_numkid >= 1 ~ "Has Children",
TRUE ~ "No Children")) %>%
mutate(inc = income) %>%
select(ed, age, hi, male, white, kids, inc)
# Run the logistic regression with HC3 robust standard errors
out <- glm(hi ~ kids*ed + ., family = "binomial", data = regg)
library(interactions)
library(ggplot2)
library(ggeffects) # Needed for predicted probabilities
# Generate predicted probabilities for interaction effect
preds <- ggpredict(out, terms = c("ed", "kids")) # Extract predictions
# Define dodge width to be consistent
dodge_width <- 0.9 # Adjusted to ensure bars are properly spaced
# Create a bar chart with manually controlled error bars
gg <- ggplot(preds, aes(x = x, y = predicted, fill = factor(group))) +
geom_col(position = position_dodge(width = dodge_width), width = .8, color = "black") + # Adjust bar width
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
width = 0.2, # **Make error bars thinner**
position = position_dodge(width = dodge_width)) + # Keep bars aligned
geom_text(aes(label = paste0(round(predicted * 100), "%"), y = 0.025), # **Fix all labels at y = 0.05**
position = position_dodge(width = dodge_width),
size = 7, family = "font", color = "white") + # White text for visibility
scale_fill_calc() +
scale_color_calc() +
theme(legend.position = "bottom") +
theme(legend.title = element_blank()) +
y_pct() +
theme(plot.title = element_text(size = 10)) +
theme_rb(legend = TRUE) +
labs(
x = "",
y = "",
title = "The Impact of Education and Parental Status on Weekly Religious Attendance",
subtitle = "Among 18-45 Year Olds",
caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024"
)
# Save the plot
save("cat_plot_pulse_ed_kids.png", wd = 8)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment