Created
July 10, 2025 19:57
-
-
Save acbass49/77a5f1837d0c303d1089f861fbb5b386 to your computer and use it in GitHub Desktop.
12 The Mountain Effect
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(tidyverse) | |
| library(haven) | |
| library(binom) | |
| library(car) | |
| library(openxlsx) | |
| data <- haven::read_sav('') | |
| data$stayed_mormon <- dplyr::case_when( | |
| data$oth16 %in% c(64) & data$other %in% c(64) ~ 1, | |
| data$oth16 %in% c(64) & !(data$other %in% c(64)) ~ 0, | |
| TRUE ~ NA_real_ | |
| ) | |
| data$left <- dplyr::case_when( | |
| data$oth16 %in% c(64) & data$other %in% c(64) ~ 0, | |
| data$oth16 %in% c(64) & !(data$other %in% c(64)) ~ 1, | |
| TRUE ~ NA_real_ | |
| ) | |
| data$mtn16 <- ifelse(data$region %in% c(8), 1, 0) | |
| plot <- data |> | |
| group_by(mtn16) |> | |
| count(stayed_mormon, wt=wtssps) |> | |
| drop_na() |> | |
| mutate(prop = n / sum(n), total_n = sum(n)) |> | |
| mutate( | |
| lower = binom.confint(x = n, n = total_n, method = "wilson")$lower, | |
| upper = binom.confint(x = n, n = total_n, method = "wilson")$upper | |
| ) |> | |
| ggplot(aes(x = factor(mtn16), y = prop, fill = factor(stayed_mormon))) + | |
| geom_bar(stat = "identity", position = "dodge") + | |
| geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(0.9), width = 0.25) + | |
| scale_fill_manual( | |
| values = c("#1abc9c", "#4169e1"), | |
| labels = c("No longer LDS", "Still LDS") | |
| ) + | |
| scale_x_discrete( | |
| labels = c("0" = "Grew up Outside Mtn Region", "1" = "Inside Mtn Region") | |
| ) + | |
| scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | |
| labs( | |
| x = "Mountain Region", | |
| y = "Proportion", | |
| fill = "Staying in Mormonism", | |
| title = "More Leave Who Grew Up Outside of Mtn Region", | |
| caption = "@mormon_metrics\nSource: General Social Survey (GSS) 1973-2022 (n=789)" | |
| ) + | |
| theme_minimal() + | |
| theme( | |
| axis.ticks = element_blank(), | |
| axis.title = element_text(size = 14), | |
| axis.text.y = element_text(size = 10), | |
| axis.text.x = element_text(size = 10), | |
| plot.background = element_rect(fill = "grey95"), # Change entire plot background color here | |
| panel.background = element_blank(), | |
| text = element_text(face = "bold", family = "Cairo"), | |
| plot.title = element_text(size = 15, face = "bold"), | |
| legend.title = element_blank(), | |
| legend.background = element_blank(), | |
| legend.box.background = element_blank(), | |
| legend.key = element_blank(), | |
| panel.grid = element_blank(), | |
| axis.title.x = element_blank(), | |
| legend.position = "top", | |
| panel.grid.major.y = element_line(color = "lightgrey", linetype = 'solid'), | |
| plot.caption = element_text(size = 8,family = "Cairo") | |
| ) | |
| ggsave("./images/12_the_utah_effect_1.png", plot, width = 1500, height = 1500, units = "px", dpi = 300) | |
| # now i need to build a model with marital status, race, age, decade, education, and gender | |
| data$age_rc <- factor(car::recode(as.numeric(data$age), | |
| "18:34 = '18-34'; | |
| 35:49 = '35-49'; | |
| 51:64 = '51-64'; | |
| 65:100 = '65+'" | |
| ), levels = c("18-34", "35-49", "51-64", "65+")) | |
| data$decade <- case_when( | |
| data$year %in% 1970:1989 ~ 1, | |
| data$year %in% 1990:1999 ~ 2, | |
| data$year %in% 2000:2009 ~ 3, | |
| data$year %in% 2010:2019 ~ 4, | |
| data$year %in% 2020:2029 ~ 5 | |
| ) | |
| data$educ_rc <- factor(car::recode(as.numeric(data$educ), | |
| "0:12 = 'HS Grad or less'; | |
| 13:15 = 'Some College'; | |
| 16 = 'College Grad'; | |
| 17:20 = 'Postgrad'" | |
| ), levels = c("HS Grad or less", "Some College", "College Grad", "Postgrad")) | |
| model_1 <- glm( | |
| "left ~ factor(marital) + factor(race) + age_rc + decade + educ_rc + sex + mtn16", | |
| data = data, | |
| family = binomial(), | |
| weights = wtssps | |
| ) | |
| summary(model_1) | |
| model_2 <- glm( | |
| "left ~ factor(marital) + factor(race) + age_rc + decade + educ_rc*sex + mtn16", | |
| data = data |> filter(cohort >= 1980), # filter out those born before 1940 | |
| family = binomial(), | |
| weights = wtssps | |
| ) | |
| summary(model_2) | |
| # now I need to get the predicted probabilities for mtn16 | |
| # Get the most common (or reference) values for each predictor | |
| reference_row <- data.frame( | |
| marital = as.factor(names(sort(table(data[data$oth16 %in% c(64),]$marital), decreasing = TRUE))[1]), | |
| race = as.factor(names(sort(table(data[data$oth16 %in% c(64),]$race), decreasing = TRUE))[1]), | |
| age_rc = factor("18-34", levels = levels(data[data$oth16 %in% c(64),]$age_rc)), # or use the most common | |
| decade = as.integer(names(sort(table(data[data$oth16 %in% c(64),]$decade), decreasing = TRUE))[1]), | |
| educ_rc = factor("HS Grad or less", levels = levels(data[data$oth16 %in% c(64),]$educ_rc)), # or use the most common | |
| sex = as.numeric(names(sort(table(data[data$oth16 %in% c(64),]$sex), decreasing = TRUE))[1]), | |
| mtn16 = c(0, 1) | |
| ) | |
| # Get predictions and standard errors on the link (logit) scale | |
| pred <- predict(model_1, newdata = reference_row, type = "link", se.fit = TRUE) | |
| # 95% confidence intervals on the link scale | |
| crit <- qnorm(0.975) | |
| link_lower <- pred$fit - crit * pred$se.fit | |
| link_upper <- pred$fit + crit * pred$se.fit | |
| # Convert to probability scale using inverse logit | |
| inv_logit <- function(x) exp(x) / (1 + exp(x)) | |
| prob_fit <- inv_logit(pred$fit) | |
| prob_lower <- inv_logit(link_lower) | |
| prob_upper <- inv_logit(link_upper) | |
| # Add to your reference_row | |
| reference_row$predicted_probability <- prob_fit | |
| reference_row$lower_95 <- prob_lower | |
| reference_row$upper_95 <- prob_upper | |
| print(reference_row) | |
| # Now I want to plot the results in a similar style | |
| plot <- ggplot(reference_row, aes(x = mtn16, y = predicted_probability)) + | |
| geom_point(size = 6) + | |
| geom_pointrange( | |
| aes(ymin = lower_95, ymax = upper_95), | |
| linewidth = 2 | |
| ) + | |
| labs( | |
| x = "Mountain Region", | |
| y = "Probability of Leaving LDS Church", | |
| fill = "Staying in Mormonism", | |
| subtitle = "Predicted Probabilities from Logistic Regression using controls\n for age, education, race, sex, decade, and marital status", | |
| title = "2X Higher Probability of Leaving LDS Church for\nThose Who Grew Up Outside the Mtn. Region", | |
| caption = "@mormon_metrics\nSource: General Social Survey (GSS) 1973-2022 (n=789)" | |
| ) + | |
| geom_text( | |
| aes(label = scales::percent(predicted_probability, accuracy = 1)), | |
| hjust = -.5, | |
| size = 5, | |
| family = "Cairo", | |
| fontface = "bold" | |
| ) + | |
| scale_x_continuous( | |
| breaks = 0:1, | |
| labels = c("Grew up Outside Mtn Region", "Inside Mtn Region"), | |
| limits = c(-0.5, 1.5) | |
| ) + | |
| scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0, 0.5, by = 0.05), limits = c(0, 0.5)) + | |
| theme_minimal() + | |
| theme( | |
| axis.ticks = element_blank(), | |
| axis.title = element_text(size = 14), | |
| axis.text.y = element_text(size = 10), | |
| axis.text.x = element_text(size = 10), | |
| plot.background = element_rect(fill = "grey95"), # Change entire plot background color here | |
| panel.background = element_blank(), | |
| text = element_text(face = "bold", family = "Cairo"), | |
| plot.title = element_text(size = 15, face = "bold"), | |
| legend.title = element_blank(), | |
| legend.background = element_blank(), | |
| legend.box.background = element_blank(), | |
| legend.key = element_blank(), | |
| panel.grid = element_blank(), | |
| axis.title.x = element_blank(), | |
| legend.position = "top", | |
| panel.grid.major.y = element_line(color = "lightgrey", linetype = 'solid'), | |
| plot.caption = element_text(size = 8,family = "Cairo") | |
| ) | |
| ggsave("./images/12_the_utah_effect_2.png", plot, width = 1500, height = 1500, units = "px", dpi = 300) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment