-
-
Save ryanburge/020c4613e2fca5f004cf541fade94c82 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(haven) | |
# Set the path to the directory containing your .dta files | |
path <- "E:/mtf/new/all" | |
# List all .dta files in the directory | |
files <- list.files(path, pattern = "\\.dta$", full.names = TRUE) | |
# Function to create a valid R variable name from the filename | |
make_var_name <- function(filepath) { | |
base_name <- basename(filepath) | |
no_extension <- tools::file_path_sans_ext(base_name) | |
return(no_extension) | |
} | |
# Import each file and assign it to a variable in the global environment | |
lapply(files, function(file) { | |
data_name <- make_var_name(file) | |
assign(data_name, import(file), envir = .GlobalEnv) | |
}) | |
mtf <- import("E://data/mtf_full1.dta") | |
mtf <- mtf %>% | |
mutate(year = 1900 + V5) %>% | |
mutate(attend = V33) %>% | |
mutate(relimp = V34) %>% | |
mutate(drink = V74) %>% | |
mutate(smoke = V72) %>% | |
mutate(gender = V13) %>% | |
mutate(race = V14) %>% | |
mutate(sibs = V17) %>% | |
mutate(paed = V27) %>% | |
mutate(maed = V28) %>% | |
mutate(pid7 = V30) %>% | |
mutate(id5 = V31) %>% | |
mutate(grade = V43) %>% | |
mutate(weight = V8) | |
one <- mtf %>% | |
select(year, attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) | |
for (i in c(95:99, 0:11)) { | |
year_suffix <- ifelse(i <= 11, sprintf("%02d", i), i) | |
dataset_name <- paste0('mtf', year_suffix) | |
assign(dataset_name, | |
get(dataset_name) %>% | |
mutate(attend = V169, | |
relimp = V170, | |
drink = V103, | |
smoke = V101, | |
gender = V150, | |
race = V151, | |
sibs = V49, | |
paed = V163, | |
maed = V164, | |
pid7 = V166, | |
id5 = V167, | |
grade = V179, | |
weight = V5), | |
envir = .GlobalEnv) | |
} | |
######################################## | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(attend = V2169), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(relimp = V2170), envir = .GlobalEnv) | |
} | |
for (i in 12:21) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(drink = V2103), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(smoke = V2101), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(gender = V2150), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(race = V2151), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(sibs = V49), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(paed = V2163), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(maed = V2164), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(pid7 = V2166), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(pid7 = V2167), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(grade = V2179), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(id5 = V2167), envir = .GlobalEnv) | |
} | |
for (i in 12:22) { | |
dataset_name <- paste0('mtf', i) | |
assign(dataset_name, get(dataset_name) %>% | |
mutate(weight = ARCHIVE_WT), envir = .GlobalEnv) | |
} | |
######################################### | |
m95 <- mtf95 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1995) | |
m96 <- mtf96 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1996) | |
m97 <- mtf97 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1997) | |
m98 <- mtf98 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1998) | |
m99 <- mtf99 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 1999) | |
m00 <- mtf00 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2000) | |
m01 <- mtf01 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2001) | |
m02 <- mtf02 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2002) | |
m03 <- mtf03 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2003) | |
m04 <- mtf04 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2004) | |
m05 <- mtf05 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2005) | |
m06 <- mtf06 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2006) | |
m07 <- mtf07 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2007) | |
m08 <- mtf08 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2008) | |
m09 <- mtf09 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2009) | |
m10 <- mtf10 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2010) | |
m11 <- mtf11 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2011) | |
m12 <- mtf12 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2012) | |
m13 <- mtf13 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2013) | |
m14 <- mtf14 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2014) | |
m15 <- mtf15 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2015) | |
m16 <- mtf16 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2016) | |
m17 <- mtf17 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2017) | |
m18 <- mtf18 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2018) | |
m19 <- mtf19 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2019) | |
m20 <- mtf20 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2020) | |
m21 <- mtf21 %>% select( attend, relimp, drink, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2021) | |
m22 <- mtf22 %>% select( attend, relimp, smoke, gender, race, sibs, paed, maed, pid7, id5, grade, weight) %>% mutate(year = 2022) | |
# Generate a list of dataset names from m95 to m22 | |
years <- c(paste0("m", 95:99), paste0("m", sprintf("%02d", 0:9)), paste0("m", 10:22)) | |
dataset_names <- lapply(years, function(year) paste0(year)) | |
# Use map to retrieve each dataset and then combine them with bind_rows | |
combined_df <- map_dfr(dataset_names, function(name) { | |
if(exists(name)) { | |
get(name) | |
} else { | |
# If a dataset doesn't exist, create an empty placeholder to prevent errors | |
tibble() | |
} | |
}) | |
write_csv(combined_df, "E://data/all_mtf.csv") | |
two <- import("E://data/all_mtf.csv") | |
complete <- bind_rows(one, two) | |
write_csv(complete, "E://data/final_mtf.csv") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment