Created
March 4, 2017 22:08
-
-
Save nathan-russell/51457eed50205d9def776f0dbd1f5e51 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tibble) | |
library(data.table) | |
library(dplyr) | |
library(hashmap) | |
library(microbenchmark) | |
set.seed(123) | |
nkeys <- 10 ** 7 | |
keylen <- 8 | |
keys <- unique(stringi::stri_rand_strings(nkeys, keylen)) | |
values <- rnorm(length(keys)) | |
rand_key <- sample(keys, 1) | |
system.time({ data_tbl <- tibble(keys, values) }) | |
# user system elapsed | |
# 0.024 0.000 0.023 | |
system.time({ data_table <- data.table(keys, values) }) | |
# user system elapsed | |
# 0.048 0.036 0.083 | |
data_table2 <- copy(data_table) | |
system.time({ setkey(data_table2, keys) }) | |
# user system elapsed | |
# 7.864 0.000 7.368 | |
# slow to construct (SEXPs copied to C++ object) | |
system.time({ hash <- hashmap(keys, values) }) | |
# user system elapsed | |
# 3.840 0.308 4.149 | |
# sanity check | |
data_tbl %>% filter(keys == rand_key) | |
# # A tibble: 1 × 2 | |
# keys values | |
# <chr> <dbl> | |
# 1 dimWYkVf 0.3093082 | |
data_table[keys == rand_key] | |
# keys values | |
# 1: dimWYkVf 0.3093082 | |
data_table2[.(rand_key), nomatch = 0L] | |
# keys values | |
# 1: dimWYkVf 0.3093082 | |
hash[[rand_key]] | |
# [1] 0.3093082 | |
microbenchmark::microbenchmark( | |
"dplyr" = data_tbl %>% filter(keys == rand_key), | |
"data.table" = data_table[keys == rand_key], | |
"indexed DT" = data_table2[.(rand_key), nomatch = 0L], | |
"hashmap" = hash[[rand_key]], | |
times = 200L | |
) | |
# Unit: microseconds | |
# expr min lq mean median uq max neval | |
# dplyr 129863.170 135231.3850 271200.45996 147699.5085 221820.6025 1905643.772 200 | |
# data.table 479.377 555.9585 850.10595 725.1985 903.3740 3861.167 200 | |
# indexed DT 493.498 589.2130 868.77630 743.1165 914.4945 3624.291 200 | |
# hashmap 16.820 37.5895 70.70894 75.1950 86.8735 331.077 200 | |
sessionInfo() | |
# R Under development (unstable) (2017-02-21 r72242) | |
# Platform: x86_64-pc-linux-gnu (64-bit) | |
# Running under: Debian GNU/Linux 8 (jessie) | |
# | |
# Matrix products: default | |
# BLAS: /usr/local/lib64/R/lib/libRblas.so | |
# LAPACK: /usr/local/lib64/R/lib/libRlapack.so | |
# | |
# locale: | |
# [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 | |
# [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 | |
# [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C | |
# [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C | |
# | |
# attached base packages: | |
# [1] stats graphics grDevices utils datasets methods base | |
# | |
# other attached packages: | |
# [1] microbenchmark_1.4-2.1 hashmap_0.2.0 dplyr_0.5.0 | |
# [4] data.table_1.10.0 tibble_1.2 | |
# | |
# loaded via a namespace (and not attached): | |
# [1] Rcpp_0.12.9.4 codetools_0.2-15 assertthat_0.1 plyr_1.8.4 grid_3.4.0 | |
# [6] R6_2.2.0 gtable_0.2.0 DBI_0.5-1 magrittr_1.5 scales_0.4.1 | |
# [11] ggplot2_2.2.1 stringi_1.1.2 lazyeval_0.2.0 tools_3.4.0 munsell_0.4.3 | |
# [16] compiler_3.4.0 colorspace_1.2-7 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment