Skip to content

Instantly share code, notes, and snippets.

@lvalnegri
Last active August 22, 2022 10:44
Show Gist options
  • Save lvalnegri/cf34bb9714756d6d618c7ec0d42a4724 to your computer and use it in GitHub Desktop.
Save lvalnegri/cf34bb9714756d6d618c7ec0d42a4724 to your computer and use it in GitHub Desktop.
r_benchmark
dta <- palmerpenguins::penguins
ids <- sample(nrow(dta), 1e7, replace = TRUE)
dta <- dta[ids,] |> dplyr::mutate(across(where(is.factor), as.character))
tmpf_csv <- tempfile(pattern = '.csv')
data.table::fwrite(dta, tmpf_csv, nThread = 10)
tmpf_parquet <- tempfile(pattern = '.parquet')
arrow::write_parquet(dta, tmpf_parquet)
tmpf_qs <- tempfile()
qs::qsave(dta, tmpf_qs, nthreads = 10)
tmpf_fst <- tempfile()
fst::write_fst(dta, tmpf_fst)
n_iterations <- 20
res <- bench::mark(
'readr' = readr::read_csv(tmpf_csv, show_col_types = FALSE, progress = FALSE),
'data.table' = data.table::fread(tmpf_csv, nThread = 10),
'arrow' = arrow::read_csv_arrow(tmpf_csv),
'parquet' = arrow::read_parquet(tmpf_parquet),
'qs' = qs::qread(tmpf_qs, nthreads = 10),
'fst' = fst::read_fst(tmpf_fst),
check = FALSE,
iterations = n_iterations
)
res |> subset(select = c('expression', 'min', 'median', 'mem_alloc'))
#### AMD RYZEN 5 3600 (6c, 12t)
# expression min median mem_alloc
# <bch:expr> <bch:tm> <bch:tm> <bch:byt>
# 1 readr 3.66s 3.69s 611.88MB
# 2 data.table 959.47ms 987.13ms 558.6MB
# 3 arrow 310.33ms 319.15ms 118.64MB
# 4 parquet 283.04ms 303.39ms 1.08MB
# 5 qs 1.52s 1.53s 495.91MB
# 6 fst 758.77ms 764.96ms 495.92MB
#### AMD RYZEN 9 5900X (12c, 24t)
# expression min median mem_alloc
# <bch:expr> <bch:tm> <bch:tm> <bch:byt>
# 1 readr 2.22s 2.25s 612.5MB
# 2 data.table 580.9ms 584ms 556.05MB
# 3 arrow 1.41s 1.54s 118.64MB
# 4 parquet 627.74ms 640.3ms 1.08MB
# 5 qs 917.3ms 938.59ms 495.91MB
# 6 fst 702.07ms 712.91ms 495.96MB
lapply(paste0('tmpf_', c('csv', 'parquet', 'qs', 'fst')), unlink)
rm(list = ls())
gc()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment