Created
February 3, 2023 19:30
-
-
Save Aariq/3b42f0e8b28016bfda50f5c41ca29288 to your computer and use it in GitHub Desktop.
Combining multiple .csv files into a single data.frame in R in one line of code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
library(purrr) | |
#create data for testing | |
split_cars <- mtcars |> | |
group_by(cyl) |> | |
group_split() | |
tmp <- tempdir() | |
write_csv(split_cars[[1]], file.path(tmp, "mtcars1.csv")) | |
write_csv(split_cars[[2]], file.path(tmp, "mtcars2.csv")) | |
write_csv(split_cars[[3]], file.path(tmp, "mtcars3.csv")) | |
# Make a vector of file paths | |
files <- list.files(tmp, pattern = "mtcars*", full.names = TRUE) | |
files | |
# Three options for reading in data from multiple files in one line | |
# Use purrr::map_df() to iterate | |
df <- map_df(files |> set_names(files), \(x) read.csv(x), .id = "filename") | |
nrow(df) | |
# use read_csv() | |
df <- read_csv(files, id = "filename") | |
nrow(df) | |
# use the arrow package (opens as something like a database connection---good for when you have so many files they don't all fit into memory) | |
library(arrow) | |
df <- open_dataset(files, format = "csv") | |
nrow(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment