Skip to content

Instantly share code, notes, and snippets.

@acbass49
Last active March 21, 2025 19:12
Show Gist options
  • Select an option

  • Save acbass49/d7975d28a960904330b8d209a982190e to your computer and use it in GitHub Desktop.

Select an option

Save acbass49/d7975d28a960904330b8d209a982190e to your computer and use it in GitHub Desktop.
Gender Diff In Older Singles
# Title: Exploring Singles
# Author; Alex Bass
# Date: 2025-03-08
# load packages
library(tidyverse)
library(haven)
library(sjlabelled)
# load data
data_07 <- read_sav('./data/rls/rls2007.sav')
data_14 <- read_sav('./data/rls/rls2014.sav')
data_24 <- read_sav('./data/rls/rls2024.sav')
# Step 1: Get variables to align as best as possible
vars_to_keep <- c(
'disaffiliated',
'active',
'inactive',
'inactive_or_disaffiliated',
'still_id',
'age_rc',
'marital_rc',
'year',
'female'
)
# 2007
data_07$age_rc <- factor(car::recode(
as.numeric(data_07$age),
"1:35='18-35';36:45='36-45';46:65='46-65';66:97='66+';99=NA", as.numeric = FALSE),
levels = c("18-35", "36-45", "46-65", "66+")
)
data_07$marital_rc <- as.numeric(car::recode(data_07$marital, "4:5=2;6=4;9=NA"))
data_07$female <- as.numeric(car::recode(data_07$sex, "1=0;2=1;9=NA"))
data_07$attend <- as.numeric(car::recode(data_07$q20, "1:2=1;else=0"))
data_07$mormon <- ifelse(data_07$reltrad == 20000, 1, 0)
data_07_lds <- data_07 |>
filter(q50 == 3) |>
mutate(
disaffiliated = ifelse(q50 == 3 & reltrad != 20000, 1, 0),
active = ifelse(q50 == 3 & reltrad == 20000 & q20 %in% c(1,2), 1, 0),
inactive = ifelse(q50 == 3 & reltrad == 20000 & !q20 %in% c(1,2), 1, 0),
inactive_or_disaffiliated = ifelse(disaffiliated == 1 | inactive == 1, 1, 0),
still_id = ifelse(q50 == 3 & reltrad == 20000, 1, 0),
year = 2007
) |>
select(all_of(vars_to_keep))
# 2014
data_14$age_rc <- factor(car::recode(as.numeric(data_14$agerec),
"1:3='18-35';4:5='36-45';6:9='46-65';10:15='66+';99=NA", as.numeric = FALSE),
levels = c("18-35", "36-45", "46-65", "66+"))
data_14$marital_rc <- as.numeric(car::recode(data_14$marital, "4:5=2;6=4;9=NA"))
data_14$female <- as.numeric(car::recode(data_14$SEX, "1=0;2=1"))
data_14$attend <- as.numeric(car::recode(data_14$attend, "1:2=1;else=0"))
data_14$mormon <- ifelse(data_14$RELTRAD == 20000, 1, 0)
data_14_lds <- data_14 |>
filter(qj1 == 3) |>
mutate(
disaffiliated = ifelse(qj1 == 3 & RELTRAD != 20000, 1, 0),
active = ifelse(qj1 == 3 & RELTRAD == 20000 & attend %in% c(1,2), 1, 0),
inactive = ifelse(qj1 == 3 & RELTRAD == 20000 & !attend %in% c(1,2), 1, 0),
inactive_or_disaffiliated = ifelse(disaffiliated == 1 | inactive == 1, 1, 0),
still_id = ifelse(qj1 == 3 & RELTRAD == 20000, 1, 0),
year = 2014
) |>
select(all_of(vars_to_keep))
# 2024
data_24$age_rc <- factor(car::recode(
as.numeric(data_24$BIRTHDECADE),
"6:7='18-35';5='36-45';3:4='46-65';1:2='66+';99=NA", as.numeric = FALSE),
levels = c(
"18-35", "36-45", "46-65", "66+")
)
data_24$marital_rc <- as.numeric(car::recode(data_24$MARITAL, "4:5=2;6=4;99=NA"))
data_24$female <- as.numeric(car::recode(data_24$GENDER, "1=0;2=1;3=NA;99=NA"))
data_24$attend <- as.numeric(car::recode(data_24$ATTNDPERRLS, "1:2=1;else=0"))
data_24$mormon <- ifelse(data_24$RELTRAD == 20000, 1, 0)
data_24_lds <- data_24 |>
filter(FRMREL == 20000) |>
mutate(
disaffiliated = ifelse(FRMREL == 20000 & RELTRAD != 20000, 1, 0),
active = ifelse(FRMREL == 20000 & RELTRAD == 20000 & ATTNDPERRLS %in% c(1,2), 1, 0),
inactive = ifelse(FRMREL == 20000 & RELTRAD == 20000 & !ATTNDPERRLS %in% c(1,2), 1, 0),
inactive_or_disaffiliated = ifelse(disaffiliated == 1 | inactive == 1, 1, 0),
still_id = ifelse(FRMREL == 20000 & RELTRAD == 20000, 1, 0),
year = 2024
) |>
select(all_of(vars_to_keep))
# combine data
data <- bind_rows(data_07_lds, data_14_lds, data_24_lds) |>
drop_na() |>
mutate(year = as.factor(year))
# singles by year
data |>
filter(marital_rc == 4) |>
group_by(year) |>
count(still_id)
# This is really not very large Nsizes 😡
data |>
filter(marital_rc == 4) |>
count(still_id)
sum(data$marital_rc == 4) # total of 248 singles
# Visualize age distribution by inactive_or_disaffiliated
data |>
group_by(still_id) |>
count(age_rc) |>
mutate(prop = n / sum(n)) |>
ggplot(aes(x = age_rc, y=prop, group=still_id, fill=still_id)) +
geom_col(position = 'dodge') +
labs(title = "Age Distribution of Everyone", x = "Age", y = "Count")
# Visualize single age distribution by inactive_or_disaffiliated
data |>
filter(marital_rc %in% c(4)) |>
group_by(still_id, marital_rc) |>
count(age_rc) |>
mutate(prop = n / sum(n)) |>
ggplot(aes(x = age_rc, y=prop, fill=still_id, group=still_id)) +
geom_col(position = 'dodge') +
labs(title = "Age Distribution of Everyone", x = "Age", y = "Count")
# Visualize age distribution by inactive_or_disaffiliated - stacked
data |>
filter(marital_rc %in% c(4) & still_id == 1) |>
group_by(female, marital_rc) |>
count(age_rc) |>
mutate(prop = n / sum(n)) |>
ggplot(aes(x = age_rc, y=prop, fill=female, group=female)) +
geom_col(position = 'dodge') +
labs(title = "Age Distribution of Everyone", x = "Age", y = "Count")
data |>
filter(marital_rc %in% c(4) & still_id == 0) |>
group_by(female, marital_rc) |>
count(age_rc) |>
mutate(prop = n / sum(n)) |>
ggplot(aes(x = age_rc, y=prop, fill=female, group=female)) +
geom_col(position = 'dodge') +
labs(title = "Age Distribution of Everyone", x = "Age", y = "Count")
# Checking to see the age distribution of never married singles
vars_to_keep <- c(
'age_rc',
'marital_rc',
'female',
'attend',
'mormon'
)
all_data <- data_07 |>
select(all_of(vars_to_keep)) |>
bind_rows(data_14 |> select(all_of(vars_to_keep))) |>
bind_rows(data_24 |> select(all_of(vars_to_keep))) |>
drop_na()
#overall population age distribution
all_data |>
count(age_rc) |>
mutate(prop = n / sum(n)) |>
ggplot(aes(x = age_rc, y=prop)) +
geom_col(position = 'dodge') +
labs(title = "Age Distribution of Everyone", x = "Age", y = "Count")
#never married singles age distribution
all_data |>
filter(marital_rc == 4) |>
count(age_rc) |>
mutate(prop = n / sum(n)) |>
ggplot(aes(x = age_rc, y=prop)) +
geom_col(position = 'dodge') +
labs(title = "Age Distribution of Never Married Singles", x = "Age", y = "Count")
#never married singles age distribution by gender
all_data |>
filter(marital_rc %in% c(4)) |>
group_by(female, marital_rc) |>
count(age_rc) |>
mutate(prop = n / sum(n)) |>
ggplot(aes(x = age_rc, y=prop, fill=female, group=female)) +
geom_col(position = 'dodge') +
labs(title = "Age Distribution of Everyone", x = "Age", y = "Count")
plot_3 <- all_data |>
filter(marital_rc == 4) |>
filter(mormon == 1) |>
mutate(age_rc = ifelse(age_rc == "66+"|age_rc == "46-65"|age_rc=="36-45", "2", age_rc)) |>
mutate(age_rc = ifelse(age_rc == "1", '18-35','36+')) |>
mutate(sex = ifelse(female == 1, "Female", "Male")) |>
mutate(attend_church = ifelse(attend == 1, "Weekly+", "Less Than Weekly")) |>
group_by(age_rc,sex) |>
count(attend_church) |>
mutate(prop = n / sum(n)) |>
ggplot() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + # Format y-axis as percentages
labs(x = "Age Group",
y = "Proportion",
caption = "data: Pew Religious Landscape Study 2007,14,24\n@mormon_metrics")+
geom_col(aes(x = age_rc, y = prop, fill = attend_church), position = 'stack') +
facet_wrap(~sex) +
theme_minimal(base_family = "Cairo", base_size = 15) +
theme(legend.position = "top",plot.background = element_rect(fill = "grey95")) +
ggtitle("Mormon Single Church Attendance by Age X Gender (n=189)")
ggsave("./images/0_1_older_singles_4.png", plot = plot_3, width = 8, height = 8, units = "in", dpi = 300)
all_data |>
filter(marital_rc == 4) |>
filter(mormon == 1) |>
mutate(age_rc = ifelse(age_rc == "66+"|age_rc == "46-65"|age_rc=="36-45", "2", age_rc)) |>
mutate(age_rc = ifelse(age_rc == "1", '18-35','36+')) |>
mutate(sex = ifelse(female == 1, "Female", "Male")) |>
mutate(attend_church = ifelse(attend == 1, "Weekly+", "Less Than Weekly")) |>
group_by(age_rc,sex) |>
count(attend_church) |>
mutate(prop = n / sum(n)) |>
ungroup() |>
summarise(sum(n))
#gist: https://gist.github.com/acbass49/d7975d28a960904330b8d209a982190e
# Adding divorce analysis
data |>
filter(marital_rc == 3) |>
count(still_id)
# there are 96 disaffiliated divorced individuals
# 78 divorced active individuals
# 174 total divorced individuals
plot_2 <- data |>
filter(marital_rc == 3) |>
mutate(
sex = ifelse(female == 1, "female", "male"),
type = ifelse(still_id == 1, "mormon", "disaffiliated")
) |>
group_by(type) |>
count(sex) |>
mutate(prop = n / sum(n)) |>
ggplot(aes(x = type, y = prop, fill = sex)) +
geom_col(position = 'stack') +
labs(
title = "Sex Breakdown of Divorced By Active + Disaffiliated",
x = "Sex",
y = "Proportion",
caption = "data: Pew Religious Landscape Study 2007,14,24\n@mormon_metrics"
) +
scale_fill_manual(values = c("pink", "lightblue")) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + # Format y-axis as percentages
theme_minimal(base_family = "Cairo", base_size = 15) +
theme(legend.position = "top",plot.background = element_rect(fill = "grey95"))
ggsave("./images/0_1_older_singles_2.png", plot = plot_2, width = 8, height = 8, units = "in", dpi = 300)
data |>
filter(marital_rc == 3 & still_id == 1) |>
group_by(female) |>
count(age_rc) |>
mutate(prop = n / sum(n))
data |>
filter(marital_rc == 3 & still_id == 0) |>
group_by(female) |>
count(age_rc) |>
mutate(prop = n / sum(n))
plot_1 <- data |>
filter(marital_rc == 3) |>
mutate(type = ifelse(still_id == 1, "Mormon", "Disaffiliated")) |>
group_by(age_rc) |>
count(type) |>
mutate(prop = n / sum(n)) |>
ggplot(aes(x = age_rc, y = prop, fill = type)) +
geom_col(position = 'stack') +
labs(
title = "Age Distribution of Divorced Individuals",
x = "Age",
y = "Proportion",
caption = "data: Pew Religious Landscape Study 2007,14,24\n@mormon_metrics"
) +
scale_fill_manual(values = c("orange", "#0000ffbe")) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + # Format y-axis as percentages
theme_minimal(base_family = "Cairo", base_size = 15) +
theme(legend.position = "top",plot.background = element_rect(fill = "grey95"))
ggsave("./images/0_1_older_singles_3.png", plot = plot_1, width = 8, height = 8, units = "in", dpi = 300)
# Adding CES analysis
library(patchwork)
# load data
data <- read_rds("./data/CES/until2023.rds")
data$mormon <- ifelse(data$religion == 3, "Mormon", "Non-Mormon")
data$attend <- ifelse(data$relig_church %in% 1:2, "Weekly+", "Less Than Weekly")
#men vs. women by age
data |>
mutate(age = car::recode(age, "18:25='18-25';26:35='26-35';36:45='36-45';46:55='46-55';56:65='56-65';66:120='66+'")) |>
mutate(age = factor(age, levels = c("18-25", "26-35", "36-45", "46-55", "56-65", "66+"))) |>
filter(marstat == 5) |>
filter(mormon == "Mormon") |>
group_by(age) |>
count(gender) |>
drop_na() |>
mutate(prop = n / sum(n))
#active men vs. women by age
data |>
mutate(age = car::recode(age, "18:25='18-25';26:35='26-35';36:45='36-45';46:55='46-55';56:65='56-65';66:120='66+'")) |>
mutate(age = factor(age, levels = c("18-25", "26-35", "36-45", "46-55", "56-65", "66+"))) |>
filter(marstat == 5) |>
filter(mormon == "Mormon") |>
filter(attend == "Weekly+") |>
group_by(age) |>
count(gender) |>
drop_na() |>
mutate(prop = n / sum(n))
data |>
filter(mormon == "Mormon") |>
filter(marstat == 5) |>
drop_na(age, attend, gender) |>
mutate(age = as.numeric(age), attend = factor(attend),gender = sjlabelled::as_character(gender)) |>
ggplot(aes(x = age, fill = attend)) +
geom_density(alpha=0.6) +
scale_fill_manual(values = c("orange", "blue")) +
xlim(18,80) +
ggtitle("Mormon Single Population By Church Attendance") +
theme_minimal(base_family = "Cairo") +
theme(legend.position = "top") +
facet_grid(~gender)
(plot_4 <- data |>
filter(marstat == 5) |>
filter(mormon == "Mormon") |>
drop_na(age, attend, gender) |>
mutate(age = as.numeric(age), attend = factor(attend),gender = sjlabelled::as_character(gender)) |>
mutate(age_rc = factor(car::recode(age, "1:35='18-35';36:120='36+'"),levels = c("18-35", "36+"))) |>
group_by(age_rc,gender) |>
count(attend) |>
mutate(prop = n / sum(n)) |>
ggplot() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + # Format y-axis as percentages
labs(x = "Age Group",
y = "Proportion",
caption = "data: Cooperative Election Study 2007-23\n@mormon_metrics")+
geom_col(aes(x = age_rc, y = prop, fill = attend), position = 'stack') +
facet_wrap(~gender) +
theme_minimal(base_family = "Cairo", base_size = 15) +
theme(legend.position = "top",plot.background = element_rect(fill = "grey95")) +
ggtitle("Mormon Single Church Attendance by Age X Gender (n=1446)"))
ggsave("./images/0_1_older_singles_5.png", plot = plot_4, width = 8, height = 8, units = "in", dpi = 300)
p1 <- data |>
mutate(
age = as.numeric(data$age)
) |>
drop_na(age, mormon) |>
filter(marstat == 5) |>
ggplot(aes(x = age,fill=mormon)) +
geom_density(alpha=0.6, position = 'identity') +
scale_fill_manual(values = c("orange", "blue")) +
xlim(18,80) +
ggtitle("Age Distribution of Singles") +
theme_minimal(base_family = "Cairo") +
theme(legend.position = "top")
p2 <- data |>
mutate(
gender = sjlabelled::as_character(data$gender),
age = as.numeric(data$age)
) |>
drop_na(gender, age) |>
filter(marstat == 5) |>
ggplot(aes(x = age,fill=gender)) +
geom_density(alpha=0.6, position = 'identity') +
scale_fill_manual(values = c("pink", "lightblue")) +
xlim(18,80) +
theme_minimal(base_family = "Cairo") +
theme(legend.position = "top") +
ggtitle("US Single Population By Gender")
p3 <- data |>
mutate(
gender = sjlabelled::as_character(data$gender),
age = as.numeric(data$age)
) |>
drop_na(gender, age) |>
filter(marstat == 5 & religion == 3) |>
ggplot(aes(x = age,fill=gender)) +
geom_density(alpha=0.6, position = 'identity') +
scale_fill_manual(values = c("pink", "lightblue")) +
xlim(18,80) +
theme_minimal(base_family = "Cairo") +
ggtitle("Mormon Single Population By Gender") +
theme(legend.position = "top")
combined_plot <- p1 + p2 + p3 +
plot_annotation(
caption = "Data: Cooperative Election Study 2007-23\n@mormon_metrics",
theme = theme(
text = element_text(family = "Cairo"),
plot.background = element_rect(fill = "grey95")
)
)
ggsave("./images/0_1_older_singles_1.png", plot = combined_plot, width = 12, height = 8, units = "in", dpi = 300)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment