Skip to content

Instantly share code, notes, and snippets.

@ryanburge
Created February 14, 2025 22:49
Show Gist options
  • Save ryanburge/37272aa04b2dd1f4b00a689a611f98b3 to your computer and use it in GitHub Desktop.
Save ryanburge/37272aa04b2dd1f4b00a689a611f98b3 to your computer and use it in GitHub Desktop.
# Load necessary library
library(stringr)
# Set the path to the directory containing your .dta files
path <- "E:/mtf/new/all"
# List all .dta files in the directory
files <- list.files(path, pattern = "\\.dta$", full.names = TRUE)
# Function to rename files from short year to full year format
rename_files <- function(file_path) {
# Extract the base name of the file
base_name <- basename(file_path)
# Extract the year part from the base name
short_year <- str_extract(base_name, "\\d{2}")
# Determine the full year
full_year <- ifelse(as.numeric(short_year) <= 22, paste0("20", short_year), paste0("19", short_year))
# Create the new base name with full year
new_base_name <- sub(short_year, full_year, base_name)
# Create the full path for the new file name
new_file_path <- file.path(path, new_base_name)
# Rename the file
file.rename(file_path, new_file_path)
# Return the new file path
return(new_file_path)
}
# Apply the renaming function to all files
new_files <- sapply(files, rename_files)
# Print the new file names
print(new_files)
# Set the path to the directory containing your .dta files
path <- "E:/mtf/new/all"
# List all .dta files in the directory
files <- list.files(path, pattern = "\\.dta$", full.names = TRUE)
# Function to create a valid R variable name from the filename
make_var_name <- function(filepath) {
base_name <- basename(filepath)
no_extension <- tools::file_path_sans_ext(base_name)
return(no_extension)
}
# Import each file and assign it to a variable in the global environment
lapply(files, function(file) {
data_name <- make_var_name(file)
assign(data_name, import(file), envir = .GlobalEnv)
})
rm(mtf)
# Define the years you want to process
years <- 1995:2011
# Function to process each dataset
process_mtf <- function(year) {
dataset_name <- paste0("mtf20", year) # Construct dataset name
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
filter(V195 > 0, V195 != -9) %>% # Remove missing responses
mutate(V195_recode = ifelse(V195 %in% c(1, 2), 1, 0)) %>%
mean_ci(V195_recode, wt = V5, ci = .84) %>%
mutate(year = as.character(year)) # Add year column
}
# Apply function to all years and combine results
results1 <- map_dfr(years, process_mtf)
# Define the years you want to process
years <- 2012:2021
# Function to process each dataset
process_mtf <- function(year) {
dataset_name <- paste0("mtf20", year) # Adjust dataset name format
if (!exists(dataset_name)) return(NULL) # Skip if dataset doesn't exist
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
filter(V2195 > 0, V2195 != -9) %>% # Remove missing responses
mutate(V2195_recode = ifelse(V2195 %in% c(1, 2), 1, 0)) %>%
mean_ci(V2195_recode, wt = ARCHIVE_WT, ci = .84) %>%
mutate(year = as.character(year)) # Add year column
}
# Apply function to all years and combine results
results2 <- map_dfr(years, process_mtf)
gg <- bind_rows(results1, results2) %>% mutate(year = as.numeric(year))
gg %>%
ggplot(., aes(x = year, y = mean, group = 1)) +
geom_line() +
geom_point(stroke = 1, shape = 21, fill = "white", color = "firebrick") +
theme_rb() +
scale_y_continuous(labels = percent, limits = c(0, .75)) +
add_text(x = 1995, y = .305, word = "34%", sz = 7) +
add_text(x = 2021, y = .74, word = "71%", sz = 7) +
scale_x_continuous(breaks = seq(1995, 2021, by = 5)) + # Show every 5 years
labs(x = "Year", y = "", title = "The Share of High School Seniors Who Go On a Date Once a Month or Less",
caption = "@ryanburge\nData: Monitoring the Future, 1995-2021")
save("mtf_no_dates.png")
# Define years for each variable structure
years_1 <- 1995:2011 # Uses V169 for attendance
years_2 <- 2012:2021 # Uses V2169 for attendance
# Function to process 1995-2011 datasets
process_mtf_1 <- function(year) {
dataset_name <- paste0("mtf20", year) # Construct dataset name
if (!exists(dataset_name)) return(NULL) # Skip if dataset doesn't exist
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
filter(V195 > 0, V195 != -9) %>% # Remove missing responses
mutate(
V195_recode = ifelse(V195 %in% c(1, 2), 1, 0), # Recode dating variable
att = V169, # Assign attendance variable
att = frcode(att == 1 ~ "Never",
att == 2 ~ "Rarely",
att == 3 ~ "Monthly",
att == 4 ~ "Weekly") # Recode attendance categories
) %>%
group_by(att) %>%
mean_ci(V195_recode, wt = V5, ci = .84) %>%
mutate(year = as.character(year)) # Add year column
}
# Function to process 2012-2021 datasets
process_mtf_2 <- function(year) {
dataset_name <- paste0("mtf20", year) # Construct dataset name
if (!exists(dataset_name)) return(NULL) # Skip if dataset doesn't exist
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
filter(V2195 > 0, V2195 != -9) %>% # Remove missing responses
mutate(
V2195_recode = ifelse(V2195 %in% c(1, 2), 1, 0), # Recode dating variable
att = V2169, # Assign attendance variable
att = frcode(att == 1 ~ "Never",
att == 2 ~ "Rarely",
att == 3 ~ "Monthly",
att == 4 ~ "Weekly") # Recode attendance categories
) %>%
group_by(att) %>%
mean_ci(V2195_recode, wt = ARCHIVE_WT, ci = .84) %>%
mutate(year = as.character(year)) # Add year column
}
# Apply function to all years and combine results
results_1 <- map_dfr(years_1, process_mtf_1)
results_2 <- map_dfr(years_2, process_mtf_2)
# Combine both periods into a single dataframe
final_results <- bind_rows(results_1, results_2) %>% filter(att != "NA") %>% mutate(year = as.numeric(year))
final_results %>%
ggplot(., aes(x = year, y = mean, group = att, color = att)) +
geom_line() +
geom_point(stroke = 1, shape = 21, fill = "white") +
theme_rb(legend = TRUE) +
scale_color_gdocs() +
scale_y_continuous(labels = percent, limits = c(.25, .80)) +
theme(plot.title = element_text(size = 15)) +
scale_x_continuous(breaks = seq(1995, 2021, by = 5)) + # Show every 5 years
labs(x = "Year", y = "", title = "The Share of High School Seniors Who Go On a Date Once a Month or Less by Religious Attendance",
caption = "@ryanburge\nData: Monitoring the Future, 1995-2021")
save("mtf_no_dates_by_att.png")
# Define the years for each variable structure
years_1 <- 1995:2011 # Uses V194 for going out
years_2 <- 2012:2021 # Uses V2194 for going out
# Function to process 1995-2011 datasets
process_mtf_1 <- function(year) {
dataset_name <- paste0("mtf20", year) # Construct dataset name
if (!exists(dataset_name)) return(NULL) # Skip if dataset doesn't exist
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
filter(V194 > 0, V194 != 9) %>% # Remove missing responses
mutate(V194_recode = ifelse(V194 %in% c(1, 2), 1, 0)) %>%
mean_ci(V194_recode, wt = V5, ci = .84) %>%
mutate(year = as.character(year)) # Add year column
}
# Function to process 2012-2021 datasets
process_mtf_2 <- function(year) {
dataset_name <- paste0("mtf20", year) # Construct dataset name
if (!exists(dataset_name)) return(NULL) # Skip if dataset doesn't exist
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
filter(V2194 > 0, V2194 != -9) %>% # Remove missing responses
mutate(V2194_recode = ifelse(V2194 %in% c(1, 2), 1, 0)) %>%
mean_ci(V2194_recode, wt = ARCHIVE_WT, ci = .84) %>%
mutate(year = as.character(year)) # Add year column
}
# Apply function to all years and combine results
results_1 <- map_dfr(years_1, process_mtf_1)
results_2 <- map_dfr(years_2, process_mtf_2)
# Combine both periods into a single dataframe
gg <- bind_rows(results_1, results_2) %>% mutate(year = as.numeric(year))
gg %>%
ggplot(., aes(x = year, y = mean, group = 1)) +
geom_line() +
geom_point(stroke = 1, shape = 21, fill = "white", color = "darkgreen") +
theme_rb() +
scale_y_continuous(labels = percent, limits = c(0, .50)) +
add_text(x = 1995, y = .19, word = "22%", sz = 7) +
add_text(x = 2021, y = .485, word = "46%", sz = 7) +
scale_x_continuous(breaks = seq(1995, 2021, by = 5)) + # Show every 5 years
labs(x = "Year", y = "", title = "The Share of High School Seniors Who Go On Out For Fun or Recreation Once a Week or Less",
caption = "@ryanburge\nData: Monitoring the Future, 1995-2021")
save("mtf_no_social.png")
library(dplyr)
library(purrr)
# Define years for each variable structure
years_1 <- 1995:2011 # Uses V194 for going out, V169 for attendance
years_2 <- 2012:2021 # Uses V2194 for going out, V2169 for attendance
# Function to process 1995-2011 datasets
process_mtf_1 <- function(year) {
dataset_name <- paste0("mtf20", year) # Construct dataset name
if (!exists(dataset_name)) return(NULL) # Skip if dataset doesn't exist
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
filter(V194 > 0, V194 != 9) %>% # Remove missing responses
mutate(
V194_recode = ifelse(V194 %in% c(1, 2), 1, 0), # Recode going out variable
att = V169, # Assign attendance variable
att = frcode(att == 1 ~ "Never",
att == 2 ~ "Rarely",
att == 3 ~ "Monthly",
att == 4 ~ "Weekly") # Recode attendance categories
) %>%
group_by(att, year = as.character(year)) %>%
mean_ci(V194_recode, wt = V5, ci = .84)
}
# Function to process 2012-2021 datasets
process_mtf_2 <- function(year) {
dataset_name <- paste0("mtf20", year) # Construct dataset name
if (!exists(dataset_name)) return(NULL) # Skip if dataset doesn't exist
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
filter(V2194 > 0, V2194 != -9) %>% # Remove missing responses
mutate(
V2194_recode = ifelse(V2194 %in% c(1, 2), 1, 0), # Recode going out variable
att = V2169, # Assign attendance variable
att = frcode(att == 1 ~ "Never",
att == 2 ~ "Rarely",
att == 3 ~ "Monthly",
att == 4 ~ "Weekly") # Recode attendance categories
) %>%
group_by(att, year = as.character(year)) %>%
mean_ci(V2194_recode, wt = ARCHIVE_WT, ci = .84)
}
# Apply function to all years and combine results
results_1 <- map_dfr(years_1, process_mtf_1)
results_2 <- map_dfr(years_2, process_mtf_2)
# Combine both periods into a single dataframe
final_results <- bind_rows(results_1, results_2) %>%
filter(!is.na(att)) %>% # Remove NA attendance values
mutate(year = as.numeric(year))
# View the combined dataset
print(final_results)
final_results %>%
ggplot(., aes(x = year, y = mean, group = att, color = att)) +
geom_line() +
geom_point(stroke = 1, shape = 21, fill = "white") +
theme_rb(legend = TRUE) +
scale_color_gdocs() +
scale_y_continuous(labels = percent, limits = c(0, .52)) +
theme(plot.title = element_text(size = 15)) +
scale_x_continuous(breaks = seq(1995, 2021, by = 5)) + # Show every 5 years
labs(x = "Year", y = "", title = "The Share of High School Seniors Who Go On Out For Fun or Recreation Once a Week or Less",
subtitle = "By Religious Attendance",
caption = "@ryanburge\nData: Monitoring the Future, 1995-2021")
save("mtf_no_social_by_att.png")
# Define the years for each variable structure
years_1 <- 1995:2011 # Uses V192 for job status
years_2 <- 2012:2022 # Uses V2191 for job status
# Function to process 1995-2011 datasets
process_mtf_1 <- function(year) {
dataset_name <- paste0("mtf20", year) # Construct dataset name
if (!exists(dataset_name)) return(NULL) # Skip if dataset doesn't exist
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
mutate(job = case_when(V191 == 1 ~ 1, TRUE ~ 0)) %>%
mean_ci(job, wt = V5, ci = .84) %>%
mutate(year = as.character(year)) # Add year column
}
# Function to process 2012-2022 datasets
process_mtf_2 <- function(year) {
dataset_name <- paste0("mtf20", year) # Construct dataset name
if (!exists(dataset_name)) return(NULL) # Skip if dataset doesn't exist
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
mutate(job = case_when(V2191 == 1 ~ 1, TRUE ~ 0)) %>%
mean_ci(job, wt = ARCHIVE_WT, ci = .84) %>%
mutate(year = as.character(year)) # Add year column
}
# Apply function to all years and combine results
results_1 <- map_dfr(years_1, process_mtf_1)
results_2 <- map_dfr(years_2, process_mtf_2)
# Combine both periods into a single dataframe
gg <- bind_rows(results_1, results_2) %>% mutate(year = as.numeric(year))
gg %>%
ggplot(., aes(x = year, y = mean, group = 1)) +
geom_line() +
geom_point(stroke = 1, shape = 21, fill = "white", color = "darkorchid") +
theme_rb() +
scale_y_continuous(labels = percent, limits = c(0, .42)) +
add_text(x = 1995, y = .19, word = "22%", sz = 7) +
add_text(x = 2022, y = .38, word = "35%", sz = 7) +
scale_x_continuous(breaks = seq(1995, 2021, by = 5)) + # Show every 5 years
labs(x = "Year", y = "", title = "On the average over the school year, how many hours per week do you work\nin a paid or unpaid job? - Share Saying Zero Hours",
caption = "@ryanburge\nData: Monitoring the Future, 1995-2022")
save("mtf_no_job.png")
library(dplyr)
library(purrr)
# Define years for each variable structure
years_1 <- 1995:2011 # Uses V195, V194, V191
years_2 <- 2012:2022 # Uses V2195, V2194, V2191
# Function to process 1995-2011 datasets
process_mtf_1 <- function(year) {
dataset_name <- paste0("mtf20", year)
if (!exists(dataset_name)) return(NULL) # Skip missing dataset
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
mutate(
date_less = case_when(V195 %in% c(1, 2) ~ 1, V195 > 2 ~ 0, TRUE ~ NA_real_),
social_less = case_when(V194 %in% c(1, 2) ~ 1, V194 > 2 ~ 0, TRUE ~ NA_real_),
no_job = case_when(V191 == 1 ~ 1, V191 > 1 ~ 0, TRUE ~ NA_real_),
combined = ifelse(date_less == 1 & social_less == 1 & no_job == 1, 1, 0)
) %>%
filter(!is.na(combined)) %>%
mean_ci(combined, wt = V5, ci = .84) %>%
mutate(year = as.character(year))
}
# Function to process 2012-2022 datasets
process_mtf_2 <- function(year) {
dataset_name <- paste0("mtf20", year)
if (!exists(dataset_name)) return(NULL) # Skip missing dataset
dataset <- get(dataset_name) # Retrieve dataset
dataset %>%
mutate(
date_less = case_when(V2195 %in% c(1, 2) ~ 1, V2195 > 2 ~ 0, TRUE ~ NA_real_),
social_less = case_when(V2194 %in% c(1, 2) ~ 1, V2194 > 2 ~ 0, TRUE ~ NA_real_),
no_job = case_when(V2191 == 1 ~ 1, V2191 > 1 ~ 0, TRUE ~ NA_real_),
combined = ifelse(date_less == 1 & social_less == 1 & no_job == 1, 1, 0)
) %>%
filter(!is.na(combined)) %>%
mean_ci(combined, wt = ARCHIVE_WT, ci = .84) %>%
mutate(year = as.character(year))
}
# Apply function to all years and combine results
results_1 <- map_dfr(years_1, process_mtf_1)
results_2 <- map_dfr(years_2, process_mtf_2)
# Combine both periods into a single dataframe
final_results <- bind_rows(results_1, results_2) %>% mutate(year = as.numeric(year))
# View results
print(final_results)
# Plot trends over time
final_results %>%
ggplot(aes(x = year, y = mean, group = 1)) +
geom_line() +
geom_point(stroke = 1, shape = 21, fill = "white", color = "azure4") +
theme_rb() +
scale_y_continuous(labels = scales::percent_format(), limits = c(0, .20)) +
add_text(x = 1995, y = .05, word = "3.5%", sz = 7) +
add_text(x = 2022, y = .15, word = "15.8%", sz = 7) +
scale_x_continuous(breaks = seq(1995, 2022, by = 5)) + # Show every 5 years
labs(x = "Year", y = "",
title = "The Share of High School Seniors Who Rarely Date, Socialize, or Work",
subtitle = "Less Than One Date Per Month, Socialize No More than Once a Week, and Have No Job",
caption = "@ryanburge\nData: Monitoring the Future, 1995-2022")
save("mtf_combined_social_trends.png")
library(dplyr)
# Function to process 1995 dataset
process_1995 <- function(dataset) {
dataset %>%
mutate(
date_less = case_when(V195 %in% c(1, 2) ~ 1, V195 > 2 ~ 0, TRUE ~ NA_real_),
social_less = case_when(V194 %in% c(1, 2) ~ 1, V194 > 2 ~ 0, TRUE ~ NA_real_),
no_job = case_when(V191 == 1 ~ 1, V191 > 1 ~ 0, TRUE ~ NA_real_),
combined = ifelse(date_less == 1 & social_less == 1 & no_job == 1, 1, 0),
att = V169
) %>%
mutate(att = frcode(att == 1 ~ "Never",
att == 2 ~ "Rarely",
att == 3 ~ "Monthly",
att == 4 ~ "Weekly")) %>%
group_by(att) %>%
mean_ci(combined, wt = V5, ci = .84) %>%
mutate(year = "1995")
}
# Function to process 2022 dataset
process_2022 <- function(dataset) {
dataset %>%
mutate(
date_less = case_when(V2195 %in% c(1, 2) ~ 1, V2195 > 2 ~ 0, TRUE ~ NA_real_),
social_less = case_when(V2194 %in% c(1, 2) ~ 1, V2194 > 2 ~ 0, TRUE ~ NA_real_),
no_job = case_when(V2191 == 1 ~ 1, V2191 > 1 ~ 0, TRUE ~ NA_real_),
combined = ifelse(date_less == 1 & social_less == 1 & no_job == 1, 1, 0),
att = V2169
) %>%
mutate(att = frcode(att == 1 ~ "Never",
att == 2 ~ "Rarely",
att == 3 ~ "Monthly",
att == 4 ~ "Weekly")) %>%
group_by(att) %>%
mean_ci(combined, wt = ARCHIVE_WT, ci = .84) %>%
mutate(year = "2022")
}
# Load datasets
dataset_1995 <- get("mtf201995")
dataset_2022 <- get("mtf202022")
# Process both years
results_1995 <- process_1995(dataset_1995)
results_2022 <- process_2022(dataset_2022)
# Combine results
final_results <- bind_rows(results_1995, results_2022)
# View results
gg <- final_results %>% na.omit()
# Plot
gg %>%
ggplot(aes(x = att, y = mean, fill = factor(year))) +
geom_col(color = "black", position = "dodge") +
theme_rb(legend = TRUE) +
scale_fill_calc() +
lab_bar(above = TRUE, pos = .01, sz = 7, type = mean) +
y_pct() +
theme(legend.text = element_text(size = 22)) +
labs(x = "Religious Attendance", y = "",
title = "Share of Seniors with Very Little Social Life",
subtitle = "Broken Down by Religious Attendance",
caption = "@ryanburge\nData: Monitoring the Future, 1995-2022")
save("mtf_low_engagement_by_attendance_1995_2022.png", wd = 5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment