Skip to content

Instantly share code, notes, and snippets.

@Gotfrid
Created February 17, 2022 15:15
Show Gist options
  • Save Gotfrid/39ea8abdcc6142ac7f881b946dc62af5 to your computer and use it in GitHub Desktop.
Save Gotfrid/39ea8abdcc6142ac7f881b946dc62af5 to your computer and use it in GitHub Desktop.
Compare various ways of loading multiple files
# Create test data
lapply(1:9, function(x) {
write.csv(
nycflights13::flights,
paste0("data", x, ".csv")
)
})
# List all files to iterate through
all_data_files <- list.files(pattern = "\\.csv$")
# Define looping functions
for_loop <- function(data_files) {
data_list <- vector(mode = "list", length = length(data_files))
for (file in data_files) {
data_list[[file]] <- read.csv(file)
}
dplyr::bind_rows(data_list)
}
lapply_loop <- function(data_files) {
dplyr::bind_rows(
lapply(data_files, read.csv)
)
}
map_loop <- function(data_files) {
dplyr::bind_rows(
purrr::map(data_files, read.csv)
)
}
map_df_loop <- function(data_files) {
purrr::map_df(data_files, read.csv)
}
# Do the benchmark
result <- bench::mark(
for_loop(all_data_files),
lapply_loop(all_data_files),
map_loop(all_data_files),
map_df_loop(all_data_files)
)
# My result
tibble::tribble(
~expression, ~min, ~median,
"for_loop(all_data_files)", "8.65s", "8.65s",
"lapply_loop(all_data_files)", "8.91s", "8.91s",
"map_loop(all_data_files)", "8.84s", "8.84s",
"map_df_loop(all_data_files)", "8.96s", "8.96s"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment