Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
library(microbenchmark)
library(data.table)
library(plyr)
# make some test data -- 200k records
# we will try rbind-filling two of these
test.data <- vector('list', length = 200)
test.data[] <- lapply(1:200, rnorm, n = 1000)
test.data <- as.data.frame(test.data, stringsAsFactors = FALSE)
# two sets of names
names1 <- as.character(1:200)
names2 <- as.character(201:400)
test2 <- test1 <- test.data
colnames(test1) <- names1
colnames(test2) <- names2
# compare what it "costs" for on-the-fly coercion to data.table
test1.dt <- as.data.table(test1)
test2.dt <- as.data.table(test2)
# 'bench it -- 100 reps
microbenchmark(
# data.table::rbind.data.table
"data.table" = rbind(test1.dt,
test2.dt,
fill = TRUE),
# data.table::rbind.data.table + conversion of two data.frames
"as.data.table" = rbind(as.data.table(test1), as.data.table(test2), fill = TRUE),
# data.table::rbindlist
"data.table::rbindlist" = rbindlist(list(test1.dt, test2.dt), fill = TRUE),
# data.table::rbindlist + coercion
"data.table::rbindlist + coercion" = rbindlist(list(test1, test2), fill = TRUE),
# plyr::rbind.fill
"plyr" = rbind.fill(test1, test2),
times = 100
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# data.table 5.401874 5.752023 7.052155 5.865167 6.045043 22.52242 100
# as.data.table 9.842593 10.114694 11.703388 10.313812 11.054234 29.69442 100
# data.table::rbindlist 5.419195 5.691052 7.322580 5.866808 6.099170 24.41400 100
# data.table::rbindlist + coercion 5.447201 5.662593 7.680818 5.867926 6.308414 23.63164 100
# plyr 49.178042 49.744839 56.202575 50.105359 50.856851 418.81422 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.