Skip to content

Instantly share code, notes, and snippets.

@acbass49
Created July 10, 2025 19:57
Show Gist options
  • Select an option

  • Save acbass49/77a5f1837d0c303d1089f861fbb5b386 to your computer and use it in GitHub Desktop.

Select an option

Save acbass49/77a5f1837d0c303d1089f861fbb5b386 to your computer and use it in GitHub Desktop.
12 The Mountain Effect
library(tidyverse)
library(haven)
library(binom)
library(car)
library(openxlsx)
data <- haven::read_sav('')
data$stayed_mormon <- dplyr::case_when(
data$oth16 %in% c(64) & data$other %in% c(64) ~ 1,
data$oth16 %in% c(64) & !(data$other %in% c(64)) ~ 0,
TRUE ~ NA_real_
)
data$left <- dplyr::case_when(
data$oth16 %in% c(64) & data$other %in% c(64) ~ 0,
data$oth16 %in% c(64) & !(data$other %in% c(64)) ~ 1,
TRUE ~ NA_real_
)
data$mtn16 <- ifelse(data$region %in% c(8), 1, 0)
plot <- data |>
group_by(mtn16) |>
count(stayed_mormon, wt=wtssps) |>
drop_na() |>
mutate(prop = n / sum(n), total_n = sum(n)) |>
mutate(
lower = binom.confint(x = n, n = total_n, method = "wilson")$lower,
upper = binom.confint(x = n, n = total_n, method = "wilson")$upper
) |>
ggplot(aes(x = factor(mtn16), y = prop, fill = factor(stayed_mormon))) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = lower, ymax = upper), position = position_dodge(0.9), width = 0.25) +
scale_fill_manual(
values = c("#1abc9c", "#4169e1"),
labels = c("No longer LDS", "Still LDS")
) +
scale_x_discrete(
labels = c("0" = "Grew up Outside Mtn Region", "1" = "Inside Mtn Region")
) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(
x = "Mountain Region",
y = "Proportion",
fill = "Staying in Mormonism",
title = "More Leave Who Grew Up Outside of Mtn Region",
caption = "@mormon_metrics\nSource: General Social Survey (GSS) 1973-2022 (n=789)"
) +
theme_minimal() +
theme(
axis.ticks = element_blank(),
axis.title = element_text(size = 14),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 10),
plot.background = element_rect(fill = "grey95"), # Change entire plot background color here
panel.background = element_blank(),
text = element_text(face = "bold", family = "Cairo"),
plot.title = element_text(size = 15, face = "bold"),
legend.title = element_blank(),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.key = element_blank(),
panel.grid = element_blank(),
axis.title.x = element_blank(),
legend.position = "top",
panel.grid.major.y = element_line(color = "lightgrey", linetype = 'solid'),
plot.caption = element_text(size = 8,family = "Cairo")
)
ggsave("./images/12_the_utah_effect_1.png", plot, width = 1500, height = 1500, units = "px", dpi = 300)
# now i need to build a model with marital status, race, age, decade, education, and gender
data$age_rc <- factor(car::recode(as.numeric(data$age),
"18:34 = '18-34';
35:49 = '35-49';
51:64 = '51-64';
65:100 = '65+'"
), levels = c("18-34", "35-49", "51-64", "65+"))
data$decade <- case_when(
data$year %in% 1970:1989 ~ 1,
data$year %in% 1990:1999 ~ 2,
data$year %in% 2000:2009 ~ 3,
data$year %in% 2010:2019 ~ 4,
data$year %in% 2020:2029 ~ 5
)
data$educ_rc <- factor(car::recode(as.numeric(data$educ),
"0:12 = 'HS Grad or less';
13:15 = 'Some College';
16 = 'College Grad';
17:20 = 'Postgrad'"
), levels = c("HS Grad or less", "Some College", "College Grad", "Postgrad"))
model_1 <- glm(
"left ~ factor(marital) + factor(race) + age_rc + decade + educ_rc + sex + mtn16",
data = data,
family = binomial(),
weights = wtssps
)
summary(model_1)
model_2 <- glm(
"left ~ factor(marital) + factor(race) + age_rc + decade + educ_rc*sex + mtn16",
data = data |> filter(cohort >= 1980), # filter out those born before 1940
family = binomial(),
weights = wtssps
)
summary(model_2)
# now I need to get the predicted probabilities for mtn16
# Get the most common (or reference) values for each predictor
reference_row <- data.frame(
marital = as.factor(names(sort(table(data[data$oth16 %in% c(64),]$marital), decreasing = TRUE))[1]),
race = as.factor(names(sort(table(data[data$oth16 %in% c(64),]$race), decreasing = TRUE))[1]),
age_rc = factor("18-34", levels = levels(data[data$oth16 %in% c(64),]$age_rc)), # or use the most common
decade = as.integer(names(sort(table(data[data$oth16 %in% c(64),]$decade), decreasing = TRUE))[1]),
educ_rc = factor("HS Grad or less", levels = levels(data[data$oth16 %in% c(64),]$educ_rc)), # or use the most common
sex = as.numeric(names(sort(table(data[data$oth16 %in% c(64),]$sex), decreasing = TRUE))[1]),
mtn16 = c(0, 1)
)
# Get predictions and standard errors on the link (logit) scale
pred <- predict(model_1, newdata = reference_row, type = "link", se.fit = TRUE)
# 95% confidence intervals on the link scale
crit <- qnorm(0.975)
link_lower <- pred$fit - crit * pred$se.fit
link_upper <- pred$fit + crit * pred$se.fit
# Convert to probability scale using inverse logit
inv_logit <- function(x) exp(x) / (1 + exp(x))
prob_fit <- inv_logit(pred$fit)
prob_lower <- inv_logit(link_lower)
prob_upper <- inv_logit(link_upper)
# Add to your reference_row
reference_row$predicted_probability <- prob_fit
reference_row$lower_95 <- prob_lower
reference_row$upper_95 <- prob_upper
print(reference_row)
# Now I want to plot the results in a similar style
plot <- ggplot(reference_row, aes(x = mtn16, y = predicted_probability)) +
geom_point(size = 6) +
geom_pointrange(
aes(ymin = lower_95, ymax = upper_95),
linewidth = 2
) +
labs(
x = "Mountain Region",
y = "Probability of Leaving LDS Church",
fill = "Staying in Mormonism",
subtitle = "Predicted Probabilities from Logistic Regression using controls\n for age, education, race, sex, decade, and marital status",
title = "2X Higher Probability of Leaving LDS Church for\nThose Who Grew Up Outside the Mtn. Region",
caption = "@mormon_metrics\nSource: General Social Survey (GSS) 1973-2022 (n=789)"
) +
geom_text(
aes(label = scales::percent(predicted_probability, accuracy = 1)),
hjust = -.5,
size = 5,
family = "Cairo",
fontface = "bold"
) +
scale_x_continuous(
breaks = 0:1,
labels = c("Grew up Outside Mtn Region", "Inside Mtn Region"),
limits = c(-0.5, 1.5)
) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0, 0.5, by = 0.05), limits = c(0, 0.5)) +
theme_minimal() +
theme(
axis.ticks = element_blank(),
axis.title = element_text(size = 14),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 10),
plot.background = element_rect(fill = "grey95"), # Change entire plot background color here
panel.background = element_blank(),
text = element_text(face = "bold", family = "Cairo"),
plot.title = element_text(size = 15, face = "bold"),
legend.title = element_blank(),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.key = element_blank(),
panel.grid = element_blank(),
axis.title.x = element_blank(),
legend.position = "top",
panel.grid.major.y = element_line(color = "lightgrey", linetype = 'solid'),
plot.caption = element_text(size = 8,family = "Cairo")
)
ggsave("./images/12_the_utah_effect_2.png", plot, width = 1500, height = 1500, units = "px", dpi = 300)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment