Skip to content

Instantly share code, notes, and snippets.

@erikerhardt
Last active September 2, 2021 00:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikerhardt/7aef88c99ace2a2438df4228d6d99234 to your computer and use it in GitHub Desktop.
Save erikerhardt/7aef88c99ace2a2438df4228d6d99234 to your computer and use it in GitHub Desktop.
# don't print readr::read_csv() column types
options(readr.num_columns = 0)
options(readr.show_col_types = FALSE)
# remove attributes after readr::read_csv()
# attr(df, "spec") <- NULL
# remove $ and , from dollar amounts; (5) is negative 5, so remove ")" and replace "(" with "-"
`Demand Sales` = gsub("[(]", "-", gsub("[\\$,)]", "", `Demand Sales` )) %>% as.numeric()
# which to use?
tab_list_to_use <-
c(
# "Tab A"
#, "Tab B"
"Tab C"
, "Tab D"
#, "Tab E"
#, "Tab F"
#, "Tab G"
#, "Tab H"
, "Tab I"
#, "Tab J"
#, "Tab K1"
#, "Tab K2"
#, "Tab L"
#, "Tab M"
#, "Tab N"
)
## ## https://stackoverflow.com/questions/46682818/read-a-csv-file-in-a-zipped-folder-with-r-without-unzipping
## # just a list of files inside master.zip
## master <- as.character(unzip("master.zip", list = TRUE)$Name)
## # load the first file "file1.csv"
## data <- read.csv(unz("master.zip", "file1.csv"), header = TRUE, sep = ",")
## Read files and get site and table names
fn_path <-
"../data"
list_files <-
list.files(
path = fn_path
, pattern = "*.zip"
)
list_files_dates <-
list_files %>%
str_split_fixed(
pattern = fixed("_")
, n = 5
) %>%
as_tibble() %>%
select(5) %>%
unlist() %>%
str_split(
pattern = fixed(".zip")
#, n = 2
, simplify = TRUE
) %>%
as_tibble() %>%
select(1) %>%
unlist()
list_files_all <-
tibble(
fn = list_files
, date = list_files_dates
)
# list to hold all data
dat_sheet <- list()
# initialize tables
for (n_tab in tab_list_to_use) {
dat_sheet[[ n_tab ]] <- list()
}
for (i_row in 1:nrow(list_files_all)) {
## i_row = 1
message("Reading file: ", list_files_all[["fn"]][i_row])
## ## https://stackoverflow.com/questions/46682818/read-a-csv-file-in-a-zipped-folder-with-r-without-unzipping
## # just a list of files inside master.zip
## master <- as.character(unzip("master.zip", list = TRUE)$Name)
## # load the first file "file1.csv"
## data <- read.csv(unz("master.zip", "file1.csv"), header = TRUE, sep = ",")
zip_file_names <-
file.path(
fn_path
, list_files_all[["fn"]][i_row]
) %>%
unzip(list = TRUE) %>%
pull(Name) %>%
as.character()
# "Tab A - Summary.csv"
# "Tab B- Summary Detail.csv"
# "Tab C- Age and Gender Detail.csv"
# "Tab D- Assessment Detail.csv"
# "Tab E - Master HH Detail.csv"
# "Tab F - Client HH Detail.csv"
# "Tab G - Master HH Membership.csv"
# "Tab H - Client HH Membership.csv"
# "Tab I - Entry Exit Detail.csv"
# "Tab J - Income Detail.csv"
# "Tab K1 - NC Benefit Detail.csv"
# "Tab K2 - Ins Benefit Detail.csv"
# "Tab L - Disability Detail.csv"
# "Tab M - Duplicate Clients.csv"
# "Tab N - Additional Information.csv"
ind_files <-
which(
zip_file_names %>%
str_split_fixed(
pattern = fixed("-")
, n = 2
#, simplify = TRUE
) %>%
as_tibble() %>%
select(1) %>%
unlist() %>%
str_trim()
%in%
tab_list_to_use
)
for (i_csv in seq_along(ind_files)) {
# i_csv = 1
dat_sheet[[ tab_list_to_use[i_csv] ]][[ list_files_all[["date"]][i_row] ]] <-
# open zip file
unz(
description =
file.path(
fn_path
, list_files_all[["fn"]][i_row]
)
, filename =
zip_file_names[ind_files][i_csv]
) %>%
# read csv sheet
read.csv(
header = TRUE
, sep = ","
) %>%
# track which file these data came from
mutate(
sheet_date = list_files_all[["date"]][i_row]
, sheet_name = tab_list_to_use[i_csv]
)
}
}
# bind all dates together
for (n_tab in tab_list_to_use) {
dat_sheet[[ n_tab ]] <-
bind_rows(
dat_sheet[[ n_tab ]]
)
}
sapply(X = dat_sheet, FUN = dim)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment