Last active
September 2, 2021 00:10
-
-
Save erikerhardt/7aef88c99ace2a2438df4228d6d99234 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# don't print readr::read_csv() column types | |
options(readr.num_columns = 0) | |
options(readr.show_col_types = FALSE) | |
# remove attributes after readr::read_csv() | |
# attr(df, "spec") <- NULL | |
# remove $ and , from dollar amounts; (5) is negative 5, so remove ")" and replace "(" with "-" | |
`Demand Sales` = gsub("[(]", "-", gsub("[\\$,)]", "", `Demand Sales` )) %>% as.numeric() | |
# which to use? | |
tab_list_to_use <- | |
c( | |
# "Tab A" | |
#, "Tab B" | |
"Tab C" | |
, "Tab D" | |
#, "Tab E" | |
#, "Tab F" | |
#, "Tab G" | |
#, "Tab H" | |
, "Tab I" | |
#, "Tab J" | |
#, "Tab K1" | |
#, "Tab K2" | |
#, "Tab L" | |
#, "Tab M" | |
#, "Tab N" | |
) | |
## ## https://stackoverflow.com/questions/46682818/read-a-csv-file-in-a-zipped-folder-with-r-without-unzipping | |
## # just a list of files inside master.zip | |
## master <- as.character(unzip("master.zip", list = TRUE)$Name) | |
## # load the first file "file1.csv" | |
## data <- read.csv(unz("master.zip", "file1.csv"), header = TRUE, sep = ",") | |
## Read files and get site and table names | |
fn_path <- | |
"../data" | |
list_files <- | |
list.files( | |
path = fn_path | |
, pattern = "*.zip" | |
) | |
list_files_dates <- | |
list_files %>% | |
str_split_fixed( | |
pattern = fixed("_") | |
, n = 5 | |
) %>% | |
as_tibble() %>% | |
select(5) %>% | |
unlist() %>% | |
str_split( | |
pattern = fixed(".zip") | |
#, n = 2 | |
, simplify = TRUE | |
) %>% | |
as_tibble() %>% | |
select(1) %>% | |
unlist() | |
list_files_all <- | |
tibble( | |
fn = list_files | |
, date = list_files_dates | |
) | |
# list to hold all data | |
dat_sheet <- list() | |
# initialize tables | |
for (n_tab in tab_list_to_use) { | |
dat_sheet[[ n_tab ]] <- list() | |
} | |
for (i_row in 1:nrow(list_files_all)) { | |
## i_row = 1 | |
message("Reading file: ", list_files_all[["fn"]][i_row]) | |
## ## https://stackoverflow.com/questions/46682818/read-a-csv-file-in-a-zipped-folder-with-r-without-unzipping | |
## # just a list of files inside master.zip | |
## master <- as.character(unzip("master.zip", list = TRUE)$Name) | |
## # load the first file "file1.csv" | |
## data <- read.csv(unz("master.zip", "file1.csv"), header = TRUE, sep = ",") | |
zip_file_names <- | |
file.path( | |
fn_path | |
, list_files_all[["fn"]][i_row] | |
) %>% | |
unzip(list = TRUE) %>% | |
pull(Name) %>% | |
as.character() | |
# "Tab A - Summary.csv" | |
# "Tab B- Summary Detail.csv" | |
# "Tab C- Age and Gender Detail.csv" | |
# "Tab D- Assessment Detail.csv" | |
# "Tab E - Master HH Detail.csv" | |
# "Tab F - Client HH Detail.csv" | |
# "Tab G - Master HH Membership.csv" | |
# "Tab H - Client HH Membership.csv" | |
# "Tab I - Entry Exit Detail.csv" | |
# "Tab J - Income Detail.csv" | |
# "Tab K1 - NC Benefit Detail.csv" | |
# "Tab K2 - Ins Benefit Detail.csv" | |
# "Tab L - Disability Detail.csv" | |
# "Tab M - Duplicate Clients.csv" | |
# "Tab N - Additional Information.csv" | |
ind_files <- | |
which( | |
zip_file_names %>% | |
str_split_fixed( | |
pattern = fixed("-") | |
, n = 2 | |
#, simplify = TRUE | |
) %>% | |
as_tibble() %>% | |
select(1) %>% | |
unlist() %>% | |
str_trim() | |
%in% | |
tab_list_to_use | |
) | |
for (i_csv in seq_along(ind_files)) { | |
# i_csv = 1 | |
dat_sheet[[ tab_list_to_use[i_csv] ]][[ list_files_all[["date"]][i_row] ]] <- | |
# open zip file | |
unz( | |
description = | |
file.path( | |
fn_path | |
, list_files_all[["fn"]][i_row] | |
) | |
, filename = | |
zip_file_names[ind_files][i_csv] | |
) %>% | |
# read csv sheet | |
read.csv( | |
header = TRUE | |
, sep = "," | |
) %>% | |
# track which file these data came from | |
mutate( | |
sheet_date = list_files_all[["date"]][i_row] | |
, sheet_name = tab_list_to_use[i_csv] | |
) | |
} | |
} | |
# bind all dates together | |
for (n_tab in tab_list_to_use) { | |
dat_sheet[[ n_tab ]] <- | |
bind_rows( | |
dat_sheet[[ n_tab ]] | |
) | |
} | |
sapply(X = dat_sheet, FUN = dim) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment