-
-
Save ryanburge/43f0520d8a03415eb0f9e1ab59b90bb6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rio) | |
library(janitor) | |
cen <- import("E://data/pulse42.sas7bdat") %>% clean_names() | |
cen %>% ct(support3) | |
cen <- cen %>% | |
mutate(state = frcode( | |
est_st == "01" ~ "Alabama", | |
est_st == "02" ~ "Alaska", | |
est_st == "04" ~ "Arizona", | |
est_st == "05" ~ "Arkansas", | |
est_st == "06" ~ "California", | |
est_st == "08" ~ "Colorado", | |
est_st == "09" ~ "Connecticut", | |
est_st == "10" ~ "Delaware", | |
est_st == "11" ~ "District of Columbia", | |
est_st == "12" ~ "Florida", | |
est_st == "13" ~ "Georgia", | |
est_st == "15" ~ "Hawaii", | |
est_st == "16" ~ "Idaho", | |
est_st == "17" ~ "Illinois", | |
est_st == "18" ~ "Indiana", | |
est_st == "19" ~ "Iowa", | |
est_st == "20" ~ "Kansas", | |
est_st == "21" ~ "Kentucky", | |
est_st == "22" ~ "Louisiana", | |
est_st == "23" ~ "Maine", | |
est_st == "24" ~ "Maryland", | |
est_st == "25" ~ "Massachusetts", | |
est_st == "26" ~ "Michigan", | |
est_st == "27" ~ "Minnesota", | |
est_st == "28" ~ "Mississippi", | |
est_st == "29" ~ "Missouri", | |
est_st == "30" ~ "Montana", | |
est_st == "31" ~ "Nebraska", | |
est_st == "32" ~ "Nevada", | |
est_st == "33" ~ "New Hampshire", | |
est_st == "34" ~ "New Jersey", | |
est_st == "35" ~ "New Mexico", | |
est_st == "36" ~ "New York", | |
est_st == "37" ~ "North Carolina", | |
est_st == "38" ~ "North Dakota", | |
est_st == "39" ~ "Ohio", | |
est_st == "40" ~ "Oklahoma", | |
est_st == "41" ~ "Oregon", | |
est_st == "42" ~ "Pennsylvania", | |
est_st == "44" ~ "Rhode Island", | |
est_st == "45" ~ "South Carolina", | |
est_st == "46" ~ "South Dakota", | |
est_st == "47" ~ "Tennessee", | |
est_st == "48" ~ "Texas", | |
est_st == "49" ~ "Utah", | |
est_st == "50" ~ "Vermont", | |
est_st == "51" ~ "Virginia", | |
est_st == "53" ~ "Washington", | |
est_st == "54" ~ "West Virginia", | |
est_st == "55" ~ "Wisconsin", | |
est_st == "56" ~ "Wyoming" | |
)) | |
gg_data <- cen %>% | |
mutate(ff = support3) %>% | |
mutate(ff = frcode(ff == 1 ~ "Zero", | |
ff == 2 | ff == 3 ~ "1 to 11", | |
ff == 4 ~ "12+")) %>% | |
group_by(state) %>% | |
ct(ff, wt = pweight, show_na = FALSE) %>% | |
ungroup() %>% | |
mutate(state = factor(state, levels = cur_data() %>% # Correct reference | |
filter(ff == "12+") %>% | |
arrange(desc(pct)) %>% | |
pull(state))) | |
gg_data %>% | |
mutate(lab = round(pct, 2)) %>% | |
ggplot(., aes(x = 1, y = pct, fill = fct_rev(ff))) + | |
geom_col(color = "black") + | |
coord_flip() + | |
facet_wrap(~ state, ncol =1, strip.position = "left") + | |
theme_rb() + | |
scale_fill_manual(values = c("#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F")) + # New custom five-color palette | |
theme(legend.position = "bottom") + | |
scale_y_continuous(labels = percent) + | |
theme(strip.text.y.left = element_text(angle=0)) + | |
guides(fill = guide_legend(reverse=T, nrow = 1)) + | |
theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + | |
theme(panel.grid.minor.y=element_blank(), panel.grid.major.y=element_blank()) + | |
geom_text(aes(label = ifelse(pct >.05, paste0(lab*100, '%'), '')), position = position_stack(vjust = 0.5), size = 4, family = "font", color = "white") + | |
# geom_text(aes(label = ifelse(age2 == "18-35", paste0(lab*100, '%'), '')), position = position_stack(vjust = 0.5), size = 4, family = "font", color = "white") + | |
# geom_text(aes(label = ifelse(age2 == "36-44", paste0(lab*100, '%'), '')), position = position_stack(vjust = 0.5), size = 4, family = "font", color = "white") + | |
theme(plot.title = element_text(size = 16)) + | |
theme(strip.text.y.left = element_text(angle = 0, hjust = 1)) + | |
labs(x = "", y = "", title = "Religious Attendance Over the Prior Year by State", caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024") | |
save("pulse_attend_state_level.png", wd = 9, ht = 15) | |
mapp <- gg %>% filter(ff == "12+") | |
mapp$id <- tolower(mapp$state) | |
library(fiftystater) | |
data("fifty_states") | |
mapp <- left_join(fifty_states, mapp, by = c("id")) %>% as_tibble() | |
map <- mapp %>% | |
mutate(bins = frcode(pct >= .30 ~ "30%+", | |
pct >= .25 ~ "25%-30%", | |
pct >= .20 ~ "20%-25%", | |
pct >= 0 ~ "0%-20%")) | |
centers <- read_csv("D://state_centers_inset.csv") %>% rename(id = full) | |
centers$id <- tolower(centers$id) | |
centers <- left_join(map, centers, by = c("id")) %>% as_tibble() | |
centers <- centers %>% distinct(id, .keep_all = TRUE) | |
centers <- centers %>% | |
mutate(mean = round(pct, 2)) %>% | |
mutate(lab = mean*100) %>% | |
mutate(lab = paste0(lab, '%')) %>% | |
mutate(label = paste(state.y, lab, sep = ": ")) | |
map %>% | |
ggplot(., mapping = aes(x = long, y = lat, group = group, fill = bins)) + | |
geom_polygon(color = "black", linewidth = 0.1) + | |
coord_map(projection = "albers", lat0 = 41, lat1 = 44) + | |
scale_fill_manual( | |
values = c("#ce1256", "#df65b0", "#d4b9da", "#f7f4f9"), # Deep purple to light pink | |
name = "Nones (%)" | |
) + | |
ggrepel::geom_label_repel(data = centers, | |
aes(x = longitude, y = latitude, label = label, group = group), size = 2.5, fill = "white", | |
seed = 1002, family = "font", force = .001, show.legend = FALSE) + | |
theme_rb() + | |
theme(axis.line = element_blank(), | |
axis.text = element_blank(), | |
axis.ticks = element_blank(), | |
axis.title = element_blank(), | |
panel.background = element_blank(), | |
panel.border = element_blank(), | |
panel.grid = element_blank(), | |
panel.spacing = unit(0, "lines"), | |
plot.background = element_blank(), | |
legend.position = "bottom", | |
plot.title = element_text(size = 24), | |
strip.text = element_text(size = 24)) + | |
guides(fill = guide_legend(reverse=T)) + | |
labs(title = " Share Who Attend at Least 12 Times Per Year", caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024") | |
save("pulse_monthly_attend_map.png") | |
gg <- cen %>% | |
mutate(age = 2024 - tbirth_year) %>% | |
mutate(ff = support3) %>% | |
mutate(hi = case_when(ff == 4 ~ 1, | |
ff <= 3 ~ 0)) %>% | |
mutate(age2 = frcode(age <= 39 ~ "18-\n39", | |
age >= 40 & age <= 64 ~ "40-\n64", | |
age >= 65 ~ "65+")) %>% | |
group_by(state, age2) %>% | |
mean_ci(hi, wt = pweight, ci = .84) | |
library(geofacet) | |
gg %>% | |
mutate(lab = round(mean, 2)) %>% | |
ggplot(., aes(x = age2, y = mean, fill = age2)) + | |
geom_col(color = "black") + | |
facet_geo(~ state) + | |
theme_rb() + | |
scale_fill_gdocs() + # Using a different color scale that is included with ggplot2 | |
scale_y_continuous(labels = percent, limits = c(0, .65)) + | |
geom_text(aes(y = mean + .07, label = paste0(lab*100, '%')), position = position_dodge(width = .9), size = 4, family = "font") + | |
theme(axis.text.x = element_text(size = 10)) + | |
theme(strip.text = element_text(size = 10)) + | |
theme(plot.title = element_text(size = 26)) + | |
labs(x = "", y = "", title = "Share Who Attend at Least 12 Times Per Year", caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024") | |
save("geo_facet_pulse_attend.png", wd = 12, ht = 10) | |
gg <- cen %>% | |
mutate(ed = frcode(eeduc == 1 | eeduc == 2 ~ "No HS", | |
eeduc == 3 ~ "HS Grad", | |
eeduc == 4 ~ "Some College", | |
eeduc == 5 ~ "2 Yr.", | |
eeduc == 6 ~ "4 Yr.", | |
eeduc == 7 ~ "Grad. Degree")) %>% | |
mutate(age = 2024 - tbirth_year) %>% | |
mutate(age2 = frcode(age >= 18 & age <= 35 ~ "18-35", | |
age >= 36 & age <= 44 ~ "36-44", | |
age >= 45 & age <= 54 ~ "45-54", | |
age >= 55 & age <= 64 ~ "55-64", | |
age >= 65 ~ "65+")) %>% | |
mutate(ff = support3) %>% | |
mutate(hi = case_when(ff == 4 ~ 1, | |
ff <= 3 ~ 0)) %>% | |
group_by(age2, ed) %>% | |
mean_ci(hi, wt = pweight) | |
gg <- gg %>% mutate(bins = frcode( | |
mean >= 0.30 ~ "30%+", | |
mean >= 0.25 ~ "25 to 30%", | |
mean >= 0.20 ~ "20 to 25%", | |
TRUE ~ "Less than 20%" | |
)) | |
gg %>% | |
mutate(lab = round(mean, 2)) %>% | |
ggplot(., aes(x = age2, y = ed, fill = bins)) + | |
geom_tile(color = "black") + | |
scale_fill_manual(values = c( | |
"30%+" = "#08306B", # Darkest Blue | |
"25 to 30%" = "#2171B5", # Medium Dark Blue | |
"20 to 25%" = "#6BAED6", # Medium Light Blue | |
"Less than 20%" = "#C6DBEF" # Lightest Blue | |
)) + | |
theme_rb() + | |
geom_text(aes(x = age2, y = ed, label = paste0(lab*100, '%')), size = 8, family = "font") + | |
geom_text(aes(x = age2, y = ed, label = ifelse(mean > .25, paste0(lab*100, '%'), "")), size = 8, family = "font", color = "white") + | |
labs(x= "Age", y = "Education", title = "Attending at Least 12x Per Year by Age and Education", | |
caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024") | |
save("pulse_heat_ed_attend.png", wd = 6, ht = 6) | |
one <- cen %>% | |
mutate(age = 2024 - tbirth_year) %>% | |
filter(age <= 45) %>% | |
mutate(kid1 = case_when(kids_lt1y == 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(kid4 = case_when(kids_1_4y == 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(kid11 = case_when(kids_5_11y == 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(kid17 = case_when(kids_12_17y == 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(kid = kid1 + kid4 + kid11 + kid17) %>% | |
mutate(kid = frcode(kid >= 1 ~ "Yes", | |
kid == 0 ~ "No")) %>% | |
mutate(ff = support3) %>% | |
mutate(ff = frcode(ff == 1 ~ "Zero", | |
ff == 2 | ff == 3 ~ "1 to 11", | |
ff == 4 ~ "12+")) %>% | |
group_by(kid) %>% | |
ct(ff, wt = pweight, show_na = FALSE) %>% | |
mutate(type = "Any Children") | |
two <- cen %>% | |
mutate(age = 2024 - tbirth_year) %>% | |
filter(age <= 45) %>% | |
mutate(kid1 = case_when(kids_lt1y == 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(kid = frcode(kid1 >= 1 ~ "Yes", | |
kid1 == 0 ~ "No")) %>% | |
mutate(ff = support3) %>% | |
mutate(ff = frcode(ff == 1 ~ "Zero", | |
ff == 2 | ff == 3 ~ "1 to 11", | |
ff == 4 ~ "12+")) %>% | |
group_by(kid) %>% | |
ct(ff, wt = pweight, show_na = FALSE) %>% | |
mutate(type = "Children Under 1 Year Old") | |
three <- cen %>% | |
mutate(age = 2024 - tbirth_year) %>% | |
filter(age <= 45) %>% | |
mutate(kid4 = case_when(kids_1_4y == 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(kid = frcode(kid4 >= 1 ~ "Yes", | |
kid4 == 0 ~ "No")) %>% | |
mutate(ff = support3) %>% | |
mutate(ff = frcode(ff == 1 ~ "Zero", | |
ff == 2 | ff == 3 ~ "1 to 11", | |
ff == 4 ~ "12+")) %>% | |
group_by(kid) %>% | |
ct(ff, wt = pweight, show_na = FALSE) %>% | |
mutate(type = "Children 1-4 Years Old") | |
four <- cen %>% | |
mutate(age = 2024 - tbirth_year) %>% | |
filter(age <= 45) %>% | |
mutate(kid4 = case_when(kids_5_11y == 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(kid = frcode(kid4 >= 1 ~ "Yes", | |
kid4 == 0 ~ "No")) %>% | |
mutate(ff = support3) %>% | |
mutate(ff = frcode(ff == 1 ~ "Zero", | |
ff == 2 | ff == 3 ~ "1 to 11", | |
ff == 4 ~ "12+")) %>% | |
group_by(kid) %>% | |
ct(ff, wt = pweight, show_na = FALSE) %>% | |
mutate(type = "Children 5-11 Years Old") | |
five <- cen %>% | |
mutate(age = 2024 - tbirth_year) %>% | |
filter(age <= 45) %>% | |
mutate(kid4 = case_when(kids_12_17y == 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(kid = frcode(kid4 >= 1 ~ "Yes", | |
kid4 == 0 ~ "No")) %>% | |
mutate(ff = support3) %>% | |
mutate(ff = frcode(ff == 1 ~ "Zero", | |
ff == 2 | ff == 3 ~ "1 to 11", | |
ff == 4 ~ "12+")) %>% | |
group_by(kid) %>% | |
ct(ff, wt = pweight, show_na = FALSE) %>% | |
mutate(type = "Children 12-17 Years Old") | |
all <- bind_rows(one, two, three, four, five) | |
all <- all %>% | |
mutate(type = fct_relevel(type, | |
"Any Children", | |
"Children Under 1 Year Old", | |
"Children 1-4 Years Old", | |
"Children 5-11 Years Old", | |
"Children 12-17 Years Old" | |
)) | |
all %>% | |
mutate(lab = round(pct, 2)) %>% | |
ggplot(., aes(x = fct_rev(kid), y = pct, fill = fct_rev(ff))) + | |
geom_col(color = "black") + | |
coord_flip() + | |
facet_wrap(~ type, ncol = 1, strip.position = "top") + | |
theme_rb() + | |
scale_fill_manual(values = c("#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F")) + # New custom five-color palette | |
theme(legend.position = "bottom") + | |
scale_y_continuous(labels = scales::percent) + | |
guides(fill = guide_legend(reverse = TRUE, nrow = 1)) + | |
theme( | |
panel.grid.minor.y = element_blank(), | |
panel.grid.major.y = element_blank(), | |
plot.title = element_text(size = 18), # Increase title size slightly | |
strip.text = element_text(size = 22, face = "bold"), # Increase facet label size | |
axis.text.y = element_text(size = 28, face = "bold") # **Increase left-side text size** | |
) + | |
geom_text(aes(label = ifelse(pct > .05, paste0(lab * 100, '%'), '')), | |
position = position_stack(vjust = 0.5), size = 8, family = "font", color = "black") + | |
labs( | |
x = "", | |
y = "", | |
title = "Religious Attendance Among 18-45 Year Olds By Parenting Status", | |
caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024" | |
) | |
save("pulse_attend_kids_types.png", wd = 9, ht = 8) | |
regg <- cen %>% | |
mutate(ed = eeduc) %>% | |
mutate(age = 2024 - tbirth_year) %>% | |
mutate(ff = support3) %>% | |
mutate(hi = case_when(ff == 4 ~ 1, | |
ff <= 3 ~ 0)) %>% | |
mutate(male = case_when(egenid_birth == 1 ~ 1, | |
egenid_birth == 2 ~ 0)) %>% | |
mutate(white = case_when(rrace == 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(kids = case_when(thhld_numkid >= 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(inc = income) %>% | |
select(ed, age, hi, male, white, kids, inc) | |
regg <- regg %>% | |
mutate(across(where(is.numeric), ~ rescale(.x, to = c(0, 1)))) | |
coef_names <- c("Education" = "ed", | |
"Age" = "age", | |
"Male" = "male", | |
"Income" = "inc", | |
"White" = "white", | |
"Has Children" = "kids") | |
# Run the logistic regression with HC3 robust standard errors | |
reg1 <- glm(hi ~ ., family = "binomial", data = regg) | |
gg <- plot_summs(reg1, scale = TRUE, robust = "HC3", coefs = coef_names) | |
gg + | |
labs(title = "What Factors Predict a Greater Likelihood of Attending Religious Services Monthly or More?", | |
x = "Odds Ratio", | |
y = "", | |
caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024") + | |
theme_rb() | |
save("regress_pulse_attend.png", ht = 4, wd = 10) | |
### Interaction ### | |
regg <- cen %>% | |
mutate(ed = frcode(eeduc == 1 | eeduc == 2 ~ "No HS", | |
eeduc == 3 ~ "HS Grad", | |
eeduc == 4 ~ "Some College", | |
eeduc == 5 ~ "2 Yr.", | |
eeduc == 6 ~ "4 Yr.", | |
eeduc == 7 ~ "Grad. Degree")) %>% | |
mutate(age = 2024 - tbirth_year) %>% | |
filter(age <= 45) %>% | |
mutate(ff = support3) %>% | |
mutate(hi = case_when(ff == 4 ~ 1, | |
ff <= 3 ~ 0)) %>% | |
mutate(male = case_when(egenid_birth == 1 ~ 1, | |
egenid_birth == 2 ~ 0)) %>% | |
mutate(white = case_when(rrace == 1 ~ 1, | |
TRUE ~ 0)) %>% | |
mutate(kids = frcode(thhld_numkid >= 1 ~ "Has Children", | |
TRUE ~ "No Children")) %>% | |
mutate(inc = income) %>% | |
select(ed, age, hi, male, white, kids, inc) | |
# Run the logistic regression with HC3 robust standard errors | |
out <- glm(hi ~ kids*ed + ., family = "binomial", data = regg) | |
library(interactions) | |
library(ggplot2) | |
library(ggeffects) # Needed for predicted probabilities | |
# Generate predicted probabilities for interaction effect | |
preds <- ggpredict(out, terms = c("ed", "kids")) # Extract predictions | |
# Define dodge width to be consistent | |
dodge_width <- 0.9 # Adjusted to ensure bars are properly spaced | |
# Create a bar chart with manually controlled error bars | |
gg <- ggplot(preds, aes(x = x, y = predicted, fill = factor(group))) + | |
geom_col(position = position_dodge(width = dodge_width), width = .8, color = "black") + # Adjust bar width | |
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), | |
width = 0.2, # **Make error bars thinner** | |
position = position_dodge(width = dodge_width)) + # Keep bars aligned | |
geom_text(aes(label = paste0(round(predicted * 100), "%"), y = 0.025), # **Fix all labels at y = 0.05** | |
position = position_dodge(width = dodge_width), | |
size = 7, family = "font", color = "white") + # White text for visibility | |
scale_fill_calc() + | |
scale_color_calc() + | |
theme(legend.position = "bottom") + | |
theme(legend.title = element_blank()) + | |
y_pct() + | |
theme(plot.title = element_text(size = 10)) + | |
theme_rb(legend = TRUE) + | |
labs( | |
x = "", | |
y = "", | |
title = "The Impact of Education and Parental Status on Weekly Religious Attendance", | |
subtitle = "Among 18-45 Year Olds", | |
caption = "@ryanburge\nData: Household Pulse Survey, Phase 4.2, 2024" | |
) | |
# Save the plot | |
save("cat_plot_pulse_ed_kids.png", wd = 8) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment