-
-
Save ryanburge/020c4613e2fca5f004cf541fade94c82 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(haven) | |
| # Set the path to the directory containing your .dta files | |
| path <- "E:/mtf/new/all" | |
| # List all .dta files in the directory | |
| files <- list.files(path, pattern = "\\.dta$", full.names = TRUE) | |
| # Function to create a valid R variable name from the filename | |
| make_var_name <- function(filepath) { | |
| base_name <- basename(filepath) | |
| no_extension <- tools::file_path_sans_ext(base_name) | |
| return(no_extension) | |
| } | |
| # Import each file and assign it to a variable in the global environment | |
| lapply(files, function(file) { | |
| data_name <- make_var_name(file) | |
| assign(data_name, import(file), envir = .GlobalEnv) | |
| }) | |
| mtf <- import("E://data/mtf_full1.dta") | |
| mtf <- mtf %>% | |
| mutate(year = 1900 + V5) %>% | |
| mutate(attend = V33) %>% | |
| mutate(relimp = V34) %>% | |
| mutate(drink = V74) %>% | |
| mutate(smoke = V72) %>% | |
| mutate(gender = V13) %>% | |
| mutate(race = V14) %>% | |
| mutate(sibs = V17) %>% | |
| mutate(paed = V27) %>% | |
| mutate(maed = V28) %>% | |
| mutate(pid7 = V30) %>% | |
| mutate(id5 = V31) %>% | |
| mutate(grade = V43) %>% | |
| mutate(weight = V8) | |
| one <- mtf %>% | |
| select(year, attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) | |
| for (i in c(95:99, 0:11)) { | |
| year_suffix <- ifelse(i <= 11, sprintf("%02d", i), i) | |
| dataset_name <- paste0('mtf', year_suffix) | |
| assign(dataset_name, | |
| get(dataset_name) %>% | |
| mutate(attend = V169, | |
| relimp = V170, | |
| drink = V103, | |
| smoke = V101, | |
| gender = V150, | |
| race = V151, | |
| sibs = V49, | |
| paed = V163, | |
| maed = V164, | |
| pid7 = V166, | |
| id5 = V167, | |
| grade = V179, | |
| weight = V5), | |
| envir = .GlobalEnv) | |
| } | |
| ######################################## | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(attend = V2169), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(relimp = V2170), envir = .GlobalEnv) | |
| } | |
| for (i in 12:21) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(drink = V2103), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(smoke = V2101), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(gender = V2150), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(race = V2151), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(sibs = V49), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(paed = V2163), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(maed = V2164), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(pid7 = V2166), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(pid7 = V2167), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(grade = V2179), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(id5 = V2167), envir = .GlobalEnv) | |
| } | |
| for (i in 12:22) { | |
| dataset_name <- paste0('mtf', i) | |
| assign(dataset_name, get(dataset_name) %>% | |
| mutate(weight = ARCHIVE_WT), envir = .GlobalEnv) | |
| } | |
| ######################################### | |
| m95 <- mtf95 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1995) | |
| m96 <- mtf96 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1996) | |
| m97 <- mtf97 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1997) | |
| m98 <- mtf98 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1998) | |
| m99 <- mtf99 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1999) | |
| m00 <- mtf00 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2000) | |
| m01 <- mtf01 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2001) | |
| m02 <- mtf02 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2002) | |
| m03 <- mtf03 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2003) | |
| m04 <- mtf04 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2004) | |
| m05 <- mtf05 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2005) | |
| m06 <- mtf06 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2006) | |
| m07 <- mtf07 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2007) | |
| m08 <- mtf08 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2008) | |
| m09 <- mtf09 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2009) | |
| m10 <- mtf10 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2010) | |
| m11 <- mtf11 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2011) | |
| m12 <- mtf12 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2012) | |
| m13 <- mtf13 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2013) | |
| m14 <- mtf14 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2014) | |
| m15 <- mtf15 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2015) | |
| m16 <- mtf16 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2016) | |
| m17 <- mtf17 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2017) | |
| m18 <- mtf18 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2018) | |
| m19 <- mtf19 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2019) | |
| m20 <- mtf20 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2020) | |
| m21 <- mtf21 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2021) | |
| m22 <- mtf22 %>% select( attend, relimp, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2022) | |
| # Generate a list of dataset names from m95 to m22 | |
| years <- c(paste0("m", 95:99), paste0("m", sprintf("%02d", 0:9)), paste0("m", 10:22)) | |
| dataset_names <- lapply(years, function(year) paste0(year)) | |
| # Use map to retrieve each dataset and then combine them with bind_rows | |
| combined_df <- map_dfr(dataset_names, function(name) { | |
| if(exists(name)) { | |
| get(name) | |
| } else { | |
| # If a dataset doesn't exist, create an empty placeholder to prevent errors | |
| tibble() | |
| } | |
| }) | |
| write_csv(combined_df, "E://data/all_mtf.csv") | |
| two <- import("E://data/all_mtf.csv") | |
| complete <- bind_rows(one, two) | |
| write_csv(complete, "E://data/final_mtf.csv") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment