Skip to content

Instantly share code, notes, and snippets.

@ryanburge
Created March 16, 2025 14:08
Show Gist options
  • Select an option

  • Save ryanburge/d48dfc5927218c83eb89e0b8edcf56da to your computer and use it in GitHub Desktop.

Select an option

Save ryanburge/d48dfc5927218c83eb89e0b8edcf56da to your computer and use it in GitHub Desktop.
library(rio)
ess <- import("E://data/ess23.dta")
country_mapping <- tibble(
cntry = c("AL", "AT", "BE", "BG", "CH", "CY", "CZ", "DE", "DK", "EE",
"ES", "FI", "FR", "GB", "GE", "GR", "HR", "HU", "IE", "IS",
"IL", "IT", "LT", "LU", "LV", "ME", "MK", "NL", "NO", "PL",
"PT", "RO", "RS", "RU", "SE", "SI", "SK", "TR", "UA", "XK"),
country = c("Albania", "Austria", "Belgium", "Bulgaria", "Switzerland", "Cyprus", "Czechia",
"Germany", "Denmark", "Estonia", "Spain", "Finland", "France", "United Kingdom",
"Georgia", "Greece", "Croatia", "Hungary", "Ireland", "Iceland", "Israel", "Italy",
"Lithuania", "Luxembourg", "Latvia", "Montenegro", "North Macedonia", "Netherlands",
"Norway", "Poland", "Portugal", "Romania", "Serbia", "Russian Federation", "Sweden",
"Slovenia", "Slovakia", "Turkey", "Ukraine", "Kosovo")
)
ess <- ess %>%
left_join(country_mapping, by = "cntry")
ess %>%
filter(!rlgatnd %in% c(77, 88, 99)) %>% # Remove refusal, DK, no answer
mutate(rlgatnd_binary = ifelse(rlgatnd %in% c(1, 2, 3), 1, 0)) %>%
filter(!edulvlb %in% c(5555, 7777, 8888, 9999)) %>% # Remove missing values
mutate(edulvlb_simplified = case_when(
edulvlb == 0 ~ 1, # No formal education
edulvlb == 113 ~ 2, # Primary education
edulvlb %in% c(129, 221) ~ 3, # Lower vocational
edulvlb %in% c(212, 213, 222, 223) ~ 4, # Lower secondary
edulvlb %in% c(229, 311, 312, 313) ~ 5, # Upper secondary general
edulvlb %in% c(321, 322, 323) ~ 6, # Upper secondary vocational
edulvlb %in% c(412, 413, 421, 422, 423) ~ 7, # Post-secondary non-tertiary
edulvlb %in% c(510, 520) ~ 8, # Short-cycle tertiary
edulvlb %in% c(610, 620) ~ 9, # Bachelor's or equivalent
edulvlb %in% c(710, 720) ~ 10, # Master's or equivalent
edulvlb == 800 ~ 11 # Doctoral degree
)) %>% ct(edulvlb)
gg <- ess %>%
filter(!rlgatnd %in% c(77, 88, 99)) %>% # Remove refusal, DK, no answer
mutate(wk = ifelse(rlgatnd %in% c(1, 2, 3), 1, 0)) %>%
filter(!edulvlb %in% c(5555, 7777, 8888, 9999)) %>% # Remove missing values
mutate(edulvlb_simplified = case_when(
edulvlb == 0 ~ 1, # No formal education
edulvlb == 113 ~ 2, # Primary education
edulvlb %in% c(129, 221) ~ 3, # Lower vocational
edulvlb %in% c(212, 213, 222, 223) ~ 4, # Lower secondary
edulvlb %in% c(229, 311, 312, 313) ~ 5, # Upper secondary general
edulvlb %in% c(321, 322, 323) ~ 6, # Upper secondary vocational
edulvlb %in% c(412, 413, 421, 422, 423) ~ 7, # Post-secondary non-tertiary
edulvlb %in% c(510, 520) ~ 8, # Short-cycle tertiary
edulvlb %in% c(610, 620) ~ 9, # Bachelor's or equivalent
edulvlb %in% c(710, 720) ~ 10, # Master's or equivalent
edulvlb == 800 ~ 11 # Doctoral degree
)) %>%
mutate(edulvlb_factor = factor(edulvlb_simplified, levels = 1:11,
labels = c("No formal education",
"Primary education",
"Lower vocational",
"Lower secondary",
"Upper secondary general",
"Upper secondary vocational",
"Post-secondary non-tertiary",
"Short-cycle tertiary",
"Bachelor’s or equivalent",
"Master’s or equivalent",
"Doctoral degree"))) %>%
mutate(edulvlb_binned = case_when(
edulvlb_factor %in% c("No formal education", "Primary education") ~ "No formal/\nPrimary",
edulvlb_factor == "Lower secondary" ~ "Lower\nsecondary",
edulvlb_factor %in% c("Upper secondary general", "Upper secondary vocational") ~ "Upper\nsecondary",
edulvlb_factor %in% c("Post-secondary non-tertiary", "Short-cycle tertiary") ~ "Post-sec",
edulvlb_factor == "Bachelor’s or equivalent" ~ "Bachelor’s",
edulvlb_factor %in% c("Master’s or equivalent", "Doctoral degree") ~ "Grad.\nSchool"
) %>%
factor(levels = c("No formal/\nPrimary", "Lower\nsecondary", "Upper\nsecondary",
"Post-sec", "Bachelor’s", "Grad.\nSchool"))) %>%
group_by(edulvlb_binned) %>%
mean_ci(wk, wt = dweight, ci = .84) %>% na.omit()
gg %>%
mutate(lab = round(mean, 2)) %>%
ggplot(., aes(x = edulvlb_binned, y = mean, fill = edulvlb_binned)) +
geom_col(color = 'black') +
theme_rb() +
y_pct() +
error_bar() +
scale_fill_manual(values = c(
"No formal/\nPrimary" = "#08306B",
"Lower\nsecondary" = "#2171B5",
"Upper\nsecondary" = "#4292C6",
"Post-sec" = "#6BAED6",
"Bachelor’s" = "#9ECAE1",
"Grad.\nSchool" = "#C6DBEF"
)) +
geom_text(aes(y = .025, label = paste0(lab*100, '%'),
color = ifelse(edulvlb_binned %in% c("No formal/\nPrimary", "Lower\nsecondary"), "white", "black")),
position = position_dodge(width = .9), size = 9, family = "font", fontface = "bold") +
scale_color_identity() + # Use color mapping from geom_text
labs(x = "Highest Level of Education", y = "",
title = "What's the Relationship Between Education and\nWeekly Religious Attendance in Europe?",
caption = "@ryanburge\nData: European Social Survey 11, 2023")
save("ess_wk_attend_educ.png", wd = 5)
cces_educ <- function(df, var){
df %>%
mutate(educ = frcode({{var}} == 1 ~ "No HS",
{{var}} == 2 ~ "HS Grad",
{{var}} == 3 ~ "Some\nCollege",
{{var}} == 4 ~ "2 Yr.",
{{var}} == 5 ~ "4 Yr.",
{{var}} == 6 ~ "Post-\nGrad"))
}
gg <- cces %>%
filter(year >= 2022) %>%
cces_educ(educ) %>%
mutate(wk = case_when(pew_attendance == 1 | pew_attendance == 2 ~ 1,
pew_attendance <= 6 ~ 0)) %>%
group_by(educ) %>%
mean_ci(wk, wt = weight, ci = .84)
gg %>%
mutate(lab = round(mean, 2)) %>%
ggplot(., aes(x = educ, y = mean, fill = educ)) +
geom_col(color = 'black') +
theme_rb() +
y_pct() +
error_bar() +
scale_fill_manual(values = c(
"No HS" = "#67000D", # Dark red
"HS Grad" = "#A50F15", # Medium-dark red
"Some\nCollege" = "#CB181D", # Medium red
"2 Yr." = "#EF3B2C", # Lighter red
"4 Yr." = "#FC9272", # Pale red
"Post-\nGrad" = "#FEE0D2" # Very light red
)) +
geom_text(aes(y = .025, label = paste0(lab*100, '%'),
color = ifelse(educ %in% c("No HS", "HS Grad"), "white", "black")),
position = position_dodge(width = .9), size = 9, family = "font", fontface = "bold") +
scale_color_identity() + # Use color mapping from geom_text
labs(x = "Highest Level of Education", y = "",
title = "What's the Relationship Between Education and\nWeekly Religious Attendance in the United States?",
caption = "@ryanburge\nData: Cooperative Election Study, 2022-2023")
save("ces_wk_attend_educ23.png", wd = 5.25)
gg <- ess %>%
filter(!rlgatnd %in% c(77, 88, 99)) %>% # Remove refusal, DK, no answer
mutate(wk = ifelse(rlgatnd %in% c(1, 2, 3), 1, 0)) %>%
filter(!edulvlb %in% c(5555, 7777, 8888, 9999)) %>% # Remove missing values
mutate(edulvlb_simplified = case_when(
edulvlb == 0 ~ 1, # No formal education
edulvlb == 113 ~ 2, # Primary education
edulvlb %in% c(129, 221) ~ 3, # Lower vocational
edulvlb %in% c(212, 213, 222, 223) ~ 4, # Lower secondary
edulvlb %in% c(229, 311, 312, 313) ~ 5, # Upper secondary general
edulvlb %in% c(321, 322, 323) ~ 6, # Upper secondary vocational
edulvlb %in% c(412, 413, 421, 422, 423) ~ 7, # Post-secondary non-tertiary
edulvlb %in% c(510, 520) ~ 8, # Short-cycle tertiary
edulvlb %in% c(610, 620) ~ 9, # Bachelor's or equivalent
edulvlb %in% c(710, 720) ~ 10, # Master's or equivalent
edulvlb == 800 ~ 11 # Doctoral degree
)) %>%
mutate(edulvlb_factor = factor(edulvlb_simplified, levels = 1:11,
labels = c("No formal education",
"Primary education",
"Lower vocational",
"Lower secondary",
"Upper secondary general",
"Upper secondary vocational",
"Post-secondary non-tertiary",
"Short-cycle tertiary",
"Bachelor’s or equivalent",
"Master’s or equivalent",
"Doctoral degree"))) %>%
mutate(edulvlb_binned = case_when(
edulvlb_factor %in% c("No formal education", "Primary education") ~ "No formal/\nPrimary",
edulvlb_factor == "Lower secondary" ~ "Lower\nsecondary",
edulvlb_factor %in% c("Upper secondary general", "Upper secondary vocational") ~ "Upper\nsecondary",
edulvlb_factor %in% c("Post-secondary non-tertiary", "Short-cycle tertiary") ~ "Post-sec",
edulvlb_factor == "Bachelor’s or equivalent" ~ "Bachelor’s",
edulvlb_factor %in% c("Master’s or equivalent", "Doctoral degree") ~ "Grad.\nSchool"
) %>%
factor(levels = c("No formal/\nPrimary", "Lower\nsecondary", "Upper\nsecondary",
"Post-sec", "Bachelor’s", "Grad.\nSchool"))) %>%
group_by(edulvlb_binned, country) %>%
mean_ci(wk, wt = dweight, ci = .84) %>% na.omit()
small <- gg %>%
filter(n > 50)
library(RColorBrewer)
# Generate enough pastel colors for all unique countries
num_countries <- length(unique(gg$country)) # Count unique countries
pastel_colors <- colorRampPalette(brewer.pal(9, "Pastel1"))(num_countries) # Expand palette
gg %>%
mutate(lab = round(mean, 2),
edulvlb_num = as.numeric(edulvlb_binned)) %>% # Convert education levels to numeric
ggplot(aes(x = edulvlb_num, y = mean, fill = country)) +
geom_col(color = 'black', position = position_dodge()) + # Bars
geom_smooth(aes(group = country), method = "lm", color = "black", linetype = "dashed", se = FALSE) + # Trend line
theme_rb() +
y_pct() +
scale_fill_manual(values = pastel_colors) + # Expanded pastel palette
scale_x_continuous(
breaks = c(1, 3, 5), # Ensuring at least 3 labels
labels = c("No\nformal", "Upper\nSec.", "4 Yr.")
) +
facet_wrap(~ country) +
labs(x = "Highest Level of Education", y = "",
title = "What's the Relationship Between Education and Weekly Religious Attendance in Europe?",
caption = "@ryanburge\nData: European Social Survey 11, 2023")
save("ess_wk_attend_educ_all_facets.png", ht = 12)
library(ggplot2)
library(RColorBrewer)
library(geofacet)
# Generate pastel colors for all countries
num_countries <- length(unique(gg$country))
pastel_colors <- colorRampPalette(brewer.pal(9, "Pastel1"))(num_countries)
gg %>%
mutate(lab = round(mean, 2),
edulvlb_num = as.numeric(edulvlb_binned)) %>% # Convert education levels to numeric
ggplot(aes(x = edulvlb_num, y = mean, fill = edulvlb_binned)) +
geom_col(color = 'black', position = position_dodge()) + # Bars
geom_smooth(aes(group = country), method = "lm", color = "black", linetype = "dashed", se = FALSE) + # Trend line
theme_rb() +
y_pct() +
error_bar() +
scale_fill_manual(values = pastel_colors) + # Apply pastel color fill
scale_x_continuous(
breaks = c(1, 3, 6), # Ensuring at least 3 labels
labels = c("No formal/Primary", "Upper Secondary", "Grad. School")
) +
facet_geo(~ country, grid = "eu_grid1") + # Use European grid
labs(x = "Highest Level of Education", y = "",
title = "What's the Relationship Between Education and\nWeekly Religious Attendance in Europe?",
caption = "@ryanburge\nData: European Social Survey 11, 2023") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for clarity
save("ess_wk_attend_educ_geofacet.png", ht = 12, wd = 10)
cces_educ2 <- function(df, var){
df %>%
mutate(educ = frcode({{var}} == 1 | {{var}} == 2~ "HS or\nLess",
{{var}} == 3 ~ "Some\nCollege",
{{var}} == 4 ~ "2 Yr.",
{{var}} == 5 ~ "4 Yr.",
{{var}} == 6 ~ "Post-\nGrad"))
}
gg <- cces %>%
filter(year >= 2022) %>%
cces_educ2(educ) %>%
mutate(wk = case_when(pew_attendance == 1 | pew_attendance == 2 ~ 1,
pew_attendance <= 6 ~ 0)) %>%
group_by(educ, state) %>%
mean_ci(wk, wt = weight, ci = .84)
library(geofacet)
library(dplyr)
# Create a lookup table for state abbreviations
state_lookup <- setNames(state.abb, state.name)
# Convert state names to abbreviations
gg <- gg %>%
mutate(state = dplyr::recode(state, !!!state_lookup))
library(ggplot2)
library(geofacet)
library(RColorBrewer)
# Generate a pastel color palette for education levels
pastel_colors <- brewer.pal(5, "Pastel1")
gg %>%
mutate(
lab = round(mean, 2),
educ_num = as.numeric(educ) # Convert factor to numeric for trend lines
) %>%
ggplot(aes(x = educ_num, y = mean, fill = educ)) +
geom_col(color = 'black', position = position_dodge()) + # Bars
geom_smooth(aes(group = state), method = "lm", color = "black", linetype = "dashed", se = FALSE) + # Trend line
theme_rb() +
scale_y_continuous(labels = scales::percent) + # Format y-axis as percent
scale_fill_manual(values = pastel_colors) + # Apply pastel color fill
facet_geo(~ state, grid = "us_state_grid1") +
scale_x_continuous(
breaks = c(1, 3, 5), # Ensuring at least 3 labels
labels = c("HS or Less", "Some College", "4 Yr.")
) +
labs(x = "Highest Level of Education", y = "",
title = "What's the Relationship Between Education and Weekly Religious Attendance in the United States?",
caption = "@ryanburge\nData: Cooperative Election Study, 2022-2023") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels
# Save the plot
save("usa_wk_attend_educ_geofacet.png", ht = 12, wd = 12)
regg <- ess %>%
filter(!rlgatnd %in% c(77, 88, 99)) %>% # Remove refusal, DK, no answer
mutate(wk = ifelse(rlgatnd %in% c(1, 2, 3), 1, 0)) %>%
filter(!edulvlb %in% c(5555, 7777, 8888, 9999)) %>% # Remove missing values
mutate(edulvlb_simplified = case_when(
edulvlb == 0 ~ 1, # No formal education
edulvlb == 113 ~ 2, # Primary education
edulvlb %in% c(129, 221) ~ 3, # Lower vocational
edulvlb %in% c(212, 213, 222, 223) ~ 4, # Lower secondary
edulvlb %in% c(229, 311, 312, 313) ~ 5, # Upper secondary general
edulvlb %in% c(321, 322, 323) ~ 6, # Upper secondary vocational
edulvlb %in% c(412, 413, 421, 422, 423) ~ 7, # Post-secondary non-tertiary
edulvlb %in% c(510, 520) ~ 8, # Short-cycle tertiary
edulvlb %in% c(610, 620) ~ 9, # Bachelor's or equivalent
edulvlb %in% c(710, 720) ~ 10, # Master's or equivalent
edulvlb == 800 ~ 11 # Doctoral degree
)) %>%
mutate(male = case_when(gndr == 1 ~ 1,
gndr == 2 ~ 0)) %>%
mutate(married = case_when(marsts == 1 ~ 1,
marsts <= 6 ~ 0))
model <- glm(wk ~ edulvlb_simplified + male + hinctnta + agea + married,
data = regg, family = binomial())
summary(model)
# Generate predicted probabilities for education levels
pred_edu <- ggeffect(model, terms = "edulvlb_simplified")
# Define labels (keeping every other level)
edu_labels <- c(
"1" = "No formal/\nPrimary",
"3" = "Lower\nVocational",
"5" = "Upper\nSecondary",
"7" = "Post-Sec",
"9" = "Bachelor's",
"11" = "Doctorate"
)
# Plot the predictions
ggplot(pred_edu, aes(x = as.numeric(x), y = predicted, fill = predicted)) +
geom_col(color = "black") + # Bars with gradient fill
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) + # Confidence intervals
geom_text(aes(y = .01, label = paste0(round(predicted * 100, 1), "%")), size = 6.5, family = "font") + # Percentage labels inside bars
theme_rb() +
labs(
x = "Highest Level of Education",
y = "Predicted Probability of Weekly Church Attendance",
title = "Effect of Education on Weekly Church Attendance in Europe",
caption = "@ryanburge\nData: European Social Survey 11, 2023"
) +
scale_fill_gradient(low = "#D6EAF8", high = "#2E86C1") + # Lighter blue gradient
scale_y_continuous(labels = scales::percent) + # Format y-axis as percentages
scale_x_continuous(
breaks = as.numeric(names(edu_labels)), # Show every other level
labels = edu_labels
)
save("ess_regression_educ_attend_new.png")
# Load necessary packages
library(dplyr)
library(ggplot2)
library(sjPlot)
ces <- cces23
# Recode weekly church attendance (1 = Weekly+, 0 = Less often)
ces <- ces %>%
mutate(weekly_att = ifelse(pew_churatd %in% c(1, 2), 1, 0)) %>%
filter(!is.na(weekly_att))
# Recode gender (1 = Female, 0 = Male)
ces <- ces %>%
mutate(female = ifelse(gender4 == 2, 1, 0))
# Recode race (1 = White, 0 = Non-White)
ces <- ces %>%
mutate(white = ifelse(race == 1, 1, 0))
# Create age variable
ces <- ces %>%
mutate(age = 2023 - birthyr)
# Run logistic regression
model_ces <- glm(weekly_att ~ educ + female + faminc_new + white + age + pid3,
data = ces, family = binomial())
# Display model summary
summary(model_ces)
# Create predicted probabilities for education levels
pred_edu_ces <- ggpredict(model_ces, terms = "educ")
# Plot the predictions
ggplot(pred_edu_ces, aes(x = as.numeric(x), y = predicted, fill = predicted)) +
geom_col(color = "black") + # Bars with gradient fill
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) + # Confidence intervals
geom_text(aes(y = .02, label = paste0(round(predicted * 100, 0), "%")), size = 9, family = "font") + # Percentage labels inside bars
theme_rb() +
labs(
x = "Highest Level of Education",
y = "Predicted Probability of Weekly Church Attendance",
title = "Effect of Education on Weekly Church Attendance in the U.S.",
caption = "@ryanburge\nData: Cooperative Election Study 2023"
) +
scale_y_continuous(labels = scales::percent) + # Format y-axis as percentages
scale_x_continuous(
breaks = seq(1, 6, 1), # Show every other level
labels = c("HS or Less", "HS Grad", "Some College", "2 Yr.", "4 Yr.", "Grad. School")
) +
scale_fill_gradient(low = "#FEE5D9", high = "#A50F15") # Lighter red to dark red gradient
# Save the plot
save("ces_regression_educ_attend.png", wd = 6.5)
ess %>%
filter(!rlgatnd %in% c(77, 88, 99)) %>% # Remove refusal, DK, no answer
mutate(wk = ifelse(rlgatnd %in% c(1, 2, 3), 1, 0)) %>%
filter(!edulvlb %in% c(5555, 7777, 8888, 9999)) %>% # Remove missing values
mutate(edulvlb_simplified = case_when(
edulvlb == 0 ~ 1, # No formal education
edulvlb == 113 ~ 2, # Primary education
edulvlb %in% c(129, 221) ~ 3, # Lower vocational
edulvlb %in% c(212, 213, 222, 223) ~ 4, # Lower secondary
edulvlb %in% c(229, 311, 312, 313) ~ 5, # Upper secondary general
edulvlb %in% c(321, 322, 323) ~ 6, # Upper secondary vocational
edulvlb %in% c(412, 413, 421, 422, 423) ~ 7, # Post-secondary non-tertiary
edulvlb %in% c(510, 520) ~ 8, # Short-cycle tertiary
edulvlb %in% c(610, 620) ~ 9, # Bachelor's or equivalent
edulvlb %in% c(710, 720) ~ 10, # Master's or equivalent
edulvlb == 800 ~ 11 # Doctoral degree
)) %>%
mutate(edulvlb_factor = factor(edulvlb_simplified, levels = 1:11,
labels = c("No formal education",
"Primary education",
"Lower vocational",
"Lower secondary",
"Upper secondary general",
"Upper secondary vocational",
"Post-secondary non-tertiary",
"Short-cycle tertiary",
"Bachelor’s or equivalent",
"Master’s or equivalent",
"Doctoral degree"))) %>%
mutate(edulvlb_binned = case_when(
edulvlb_factor %in% c("No formal education", "Primary education") ~ "No formal/\nPrimary",
edulvlb_factor == "Lower secondary" ~ "Lower\nsecondary",
edulvlb_factor %in% c("Upper secondary general", "Upper secondary vocational") ~ "Upper\nsecondary",
edulvlb_factor %in% c("Post-secondary non-tertiary", "Short-cycle tertiary") ~ "Post-sec",
edulvlb_factor == "Bachelor’s or equivalent" ~ "Bachelor’s",
edulvlb_factor %in% c("Master’s or equivalent", "Doctoral degree") ~ "Grad.\nSchool"
) %>%
factor(levels = c("No formal/\nPrimary", "Lower\nsecondary", "Upper\nsecondary",
"Post-sec", "Bachelor’s", "Grad.\nSchool"))) %>%
ct(edulvlb_binned, wt = dweight, show_na = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment