Skip to content

Instantly share code, notes, and snippets.

@ryanburge
Created March 11, 2024 11:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryanburge/020c4613e2fca5f004cf541fade94c82 to your computer and use it in GitHub Desktop.
Save ryanburge/020c4613e2fca5f004cf541fade94c82 to your computer and use it in GitHub Desktop.
library(haven)
# Set the path to the directory containing your .dta files
path <- "E:/mtf/new/all"
# List all .dta files in the directory
files <- list.files(path, pattern = "\\.dta$", full.names = TRUE)
# Function to create a valid R variable name from the filename
make_var_name <- function(filepath) {
base_name <- basename(filepath)
no_extension <- tools::file_path_sans_ext(base_name)
return(no_extension)
}
# Import each file and assign it to a variable in the global environment
lapply(files, function(file) {
data_name <- make_var_name(file)
assign(data_name, import(file), envir = .GlobalEnv)
})
mtf <- import("E://data/mtf_full1.dta")
mtf <- mtf %>%
mutate(year = 1900 + V5) %>%
mutate(attend = V33) %>%
mutate(relimp = V34) %>%
mutate(drink = V74) %>%
mutate(smoke = V72) %>%
mutate(gender = V13) %>%
mutate(race = V14) %>%
mutate(sibs = V17) %>%
mutate(paed = V27) %>%
mutate(maed = V28) %>%
mutate(pid7 = V30) %>%
mutate(id5 = V31) %>%
mutate(grade = V43) %>%
mutate(weight = V8)
one <- mtf %>%
select(year, attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight)
for (i in c(95:99, 0:11)) {
year_suffix <- ifelse(i <= 11, sprintf("%02d", i), i)
dataset_name <- paste0('mtf', year_suffix)
assign(dataset_name,
get(dataset_name) %>%
mutate(attend = V169,
relimp = V170,
drink = V103,
smoke = V101,
gender = V150,
race = V151,
sibs = V49,
paed = V163,
maed = V164,
pid7 = V166,
id5 = V167,
grade = V179,
weight = V5),
envir = .GlobalEnv)
}
########################################
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(attend = V2169), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(relimp = V2170), envir = .GlobalEnv)
}
for (i in 12:21) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(drink = V2103), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(smoke = V2101), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(gender = V2150), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(race = V2151), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(sibs = V49), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(paed = V2163), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(maed = V2164), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(pid7 = V2166), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(pid7 = V2167), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(grade = V2179), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(id5 = V2167), envir = .GlobalEnv)
}
for (i in 12:22) {
dataset_name <- paste0('mtf', i)
assign(dataset_name, get(dataset_name) %>%
mutate(weight = ARCHIVE_WT), envir = .GlobalEnv)
}
#########################################
m95 <- mtf95 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1995)
m96 <- mtf96 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1996)
m97 <- mtf97 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1997)
m98 <- mtf98 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1998)
m99 <- mtf99 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1999)
m00 <- mtf00 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2000)
m01 <- mtf01 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2001)
m02 <- mtf02 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2002)
m03 <- mtf03 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2003)
m04 <- mtf04 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2004)
m05 <- mtf05 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2005)
m06 <- mtf06 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2006)
m07 <- mtf07 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2007)
m08 <- mtf08 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2008)
m09 <- mtf09 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2009)
m10 <- mtf10 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2010)
m11 <- mtf11 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2011)
m12 <- mtf12 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2012)
m13 <- mtf13 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2013)
m14 <- mtf14 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2014)
m15 <- mtf15 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2015)
m16 <- mtf16 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2016)
m17 <- mtf17 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2017)
m18 <- mtf18 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2018)
m19 <- mtf19 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2019)
m20 <- mtf20 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2020)
m21 <- mtf21 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2021)
m22 <- mtf22 %>% select( attend, relimp, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2022)
# Generate a list of dataset names from m95 to m22
years <- c(paste0("m", 95:99), paste0("m", sprintf("%02d", 0:9)), paste0("m", 10:22))
dataset_names <- lapply(years, function(year) paste0(year))
# Use map to retrieve each dataset and then combine them with bind_rows
combined_df <- map_dfr(dataset_names, function(name) {
if(exists(name)) {
get(name)
} else {
# If a dataset doesn't exist, create an empty placeholder to prevent errors
tibble()
}
})
write_csv(combined_df, "E://data/all_mtf.csv")
two <- import("E://data/all_mtf.csv")
complete <- bind_rows(one, two)
write_csv(complete, "E://data/final_mtf.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment