Skip to content

Instantly share code, notes, and snippets.

@leeper
Last active March 4, 2017 01:19
Show Gist options
  • Save leeper/fee02de15a6240f952d9 to your computer and use it in GitHub Desktop.
Save leeper/fee02de15a6240f952d9 to your computer and use it in GitHub Desktop.
Merge list of data.frames
# packages
library("dplyr")
library("reshape")
library("microbenchmark")
set.seed(1)
# prep data
m <- mtcars
n <- 50L # number of data.frames
x <- replicate(n, within(setNames(m[sample(1:nrow(m), 25, FALSE),], paste0(names(m), sample(1:5, 1))), car <- sample(1:1000, 25)), simplify = FALSE)
# check structures
reduce_base <- Reduce(function(...) merge(..., by = "car", all = TRUE), x)
str(reduce_base)
reshape <- reshape::merge_all(x, by = "car", all.x = TRUE, all.y = TRUE)
str(reshape) # structure doesn't match
reduce_dplyr <- Reduce(function(...) dplyr::full_join(..., by = "car"), x)
str(reduce_dplyr)
microbenchmark(
Reduce(function(...) merge(..., by = "car", all=TRUE), x),
#reshape::merge_all(x, all.x = TRUE, all.y = TRUE),
Reduce(function(...) full_join(..., by = "car"), x)
)
## Unit: milliseconds
## expr min lq mean median uq max neval cld
## Reduce(function(...) merge(..., by = "car", all = TRUE), x) 1148.0797 1238.2552 1397.2183 1291.3522 1390.4031 2950.8649 100 b
## Reduce(function(...) full_join(..., by = "car"), x) 114.2629 130.7524 169.3708 167.5244 184.5012 367.1367 100 a
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment