Last active
November 15, 2019 00:51
-
-
Save jimhester/129284324819bd845b67e79e516bc6ca to your computer and use it in GitHub Desktop.
Gun Violence benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dtplyr) | |
library(readr) | |
suppressPackageStartupMessages(library(dplyr)) | |
suppressPackageStartupMessages(library(data.table)) | |
# It is generally better not to benchmark the print methods to avoid misleading | |
# results, also vroom is faster on this particular dataset than | |
# either readr or data.table | |
bench::mark( | |
readr = readr::read_csv("~/Downloads/gun-violence-data_01-2013_03-2018.csv", progress = FALSE, col_types = list()) %>% | |
group_by(state) %>% | |
count(sort = TRUE), | |
fread = fread("~/Downloads/gun-violence-data_01-2013_03-2018.csv") %>% | |
lazy_dt() %>% | |
group_by(state) %>% | |
count(sort = TRUE) %>% | |
as.data.table(), | |
vroom = vroom::vroom("~/Downloads/gun-violence-data_01-2013_03-2018.csv", progress = FALSE, col_types = list()) %>% | |
group_by(state) %>% | |
count(sort = TRUE), | |
iterations = 5 | |
) | |
#> Warning: Some expressions had a GC in every iteration; so filtering is | |
#> disabled. | |
#> # A tibble: 3 x 6 | |
#> expression min median `itr/sec` mem_alloc `gc/sec` | |
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> | |
#> 1 readr 2.24s 2.57s 0.398 155.8MB 1.11 | |
#> 2 fread 716.57ms 737.69ms 1.03 121.6MB 1.03 | |
#> 3 vroom 229.19ms 238.72ms 3.10 24.4MB 1.24 | |
# lazy_dt called on a tibble or data.frame will convert it to a data.table, | |
# which takes considerable time, to avoid including this in the results of the | |
# group-wise counts you need to call lazy_dt before the benchmark. | |
dat_readr <- readr::read_csv("~/Downloads/gun-violence-data_01-2013_03-2018.csv", progress = FALSE, col_types = list()) | |
dat_readr_dt <- lazy_dt(dat_readr) | |
dat_dt <- lazy_dt(fread("~/Downloads/gun-violence-data_01-2013_03-2018.csv")) | |
bench::mark( | |
readr = dat_readr %>% group_by(state) %>% count(sort = TRUE), | |
"readr-dtplyr" = dat_readr_dt %>% group_by(state) %>% count(sort = TRUE) %>% as_tibble(), | |
"fread-dtplyr" = dat_dt %>% group_by(state) %>% count(sort = TRUE) %>% as_tibble() | |
) | |
#> # A tibble: 3 x 6 | |
#> expression min median `itr/sec` mem_alloc `gc/sec` | |
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> | |
#> 1 readr 13.61ms 14.14ms 69.9 1.84MB 0 | |
#> 2 readr-dtplyr 4.83ms 5.56ms 177. 1.08MB 2.73 | |
#> 3 fread-dtplyr 4.9ms 5.45ms 180. 1.03MB 2.91 | |
#> Created on 2019-11-14 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment