Skip to content

Instantly share code, notes, and snippets.

@rmflight
Created March 30, 2016 01:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save rmflight/53a93424f00b83a907d0d79ad5557d38 to your computer and use it in GitHub Desktop.
Save rmflight/53a93424f00b83a907d0d79ad5557d38 to your computer and use it in GitHub Desktop.
feather benchmarking
library(devtools)
# install_github("wesm/feather/R")
library(feather)
library(microbenchmark)
set.seed(3-29-16)
rows <- 100000
x <- data.frame(ints = round(runif(rows, -100, 100)), stringsAsFactors = FALSE)
x$floats <- runif(rows, -100, 100)
x$bools <- sample(c(TRUE, FALSE), rows, replace = TRUE)
x$dates <- as.POSIXct(runif(rows, 100000000, 1459293171), origin = "1970-01-01")
x$categories <- as.factor(sample(c(LETTERS, 0:9), rows, replace = TRUE))
x$strings <- replicate(rows, paste0(sample(letters, sample(1:10, 1), replace = TRUE), collapse = ""))
microbenchmark(
write.csv(x, file = "x.csv"), times = 10
)
microbenchmark(
save(x, file = "x.rda"), times = 10
)
microbenchmark(
saveRDS(x, file = "x.rds"), times = 10
)
microbenchmark(
write_feather(x, "x.feather"), times = 10
)
rm(x, rows)
file.size("x.csv")
file.size("x.rda")
file.size("x.rds")
file.size("x.feather")
microbenchmark(
y <- read.csv("x.csv"), times = 10
)
rm(y)
microbenchmark(
load("x.rda"), times = 10
)
rm(x)
microbenchmark(
y <- readRDS("x.rds"), times = 10
)
rm(y)
microbenchmark(
y <- read_feather("x.feather"), times = 10
)
session_info()
> source('~/feather_benchmark.R', echo=TRUE)
> library(devtools)
> # install_github("wesm/feather/R")
> library(feather)
> library(microbenchmark)
> set.seed(3-29-16)
> rows <- 100000
> x <- data.frame(ints = round(runif(rows, -100, 100)), stringsAsFactors = FALSE)
> x$floats <- runif(rows, -100, 100)
> x$bools <- sample(c(TRUE, FALSE), rows, replace = TRUE)
> x$dates <- as.POSIXct(runif(rows, 100000000, 1459293171), origin = "1970-01-01")
> x$categories <- as.factor(sample(c(LETTERS, 0:9), rows, replace = TRUE))
> x$strings <- replicate(rows, paste0(sample(letters, sample(1:10, 1), replace = TRUE), collapse = ""))
> microbenchmark(
+ write.csv(x, file = "x.csv"), times = 10
+ )
Unit: milliseconds
expr min lq mean median uq max neval
write.csv(x, file = "x.csv") 920.1746 950.7067 1083.285 1052.906 1109.616 1569.202 10
> microbenchmark(
+ save(x, file = "x.rda"), times = 10
+ )
Unit: milliseconds
expr min lq mean median uq max neval
save(x, file = "x.rda") 485.4359 491.7663 493.8786 495.3945 496.6437 500.302 10
> microbenchmark(
+ saveRDS(x, file = "x.rds"), times = 10
+ )
Unit: milliseconds
expr min lq mean median uq max neval
saveRDS(x, file = "x.rds") 485.4695 493.785 509.7228 497.8886 501.1894 591.053 10
> microbenchmark(
+ write_feather(x, "x.feather"), times = 10
+ )
Unit: milliseconds
expr min lq mean median uq max neval
write_feather(x, "x.feather") 12.36809 27.26378 47.24269 59.39752 65.18266 67.80251 10
> rm(x, rows)
> file.size("x.csv")
[1] 6667705
> file.size("x.rda")
[1] 2172036
> file.size("x.rds")
[1] 2172037
> file.size("x.feather")
[1] 3761724
> microbenchmark(
+ y <- read.csv("x.csv"), times = 10
+ )
Unit: seconds
expr min lq mean median uq max neval
y <- read.csv("x.csv") 2.085135 2.108388 2.182528 2.209813 2.240324 2.287626 10
> rm(y)
> microbenchmark(
+ load("x.rda"), times = 10
+ )
Unit: milliseconds
expr min lq mean median uq max neval
load("x.rda") 76.07915 76.30072 78.13084 76.58067 77.5034 89.92839 10
> rm(x)
> microbenchmark(
+ y <- readRDS("x.rds"), times = 10
+ )
Unit: milliseconds
expr min lq mean median uq max neval
y <- readRDS("x.rds") 76.8466 76.98257 78.76689 77.24436 78.49101 90.58261 10
> rm(y)
> microbenchmark(
+ y <- read_feather("x.feather"), times = 10
+ )
Unit: milliseconds
expr min lq mean median uq max neval
y <- read_feather("x.feather") 11.5892 11.65439 13.26675 11.68718 11.73939 24.76797 10
> session_info()
Session info ---------------------------------------------------------------------------------------------------------------------
setting value
version R version 3.2.2 (2015-08-14)
system x86_64, linux-gnu
ui RStudio (0.99.875)
language (EN)
collate en_US.UTF-8
tz America/New_York
date 2016-03-29
Packages -------------------------------------------------------------------------------------------------------------------------
package * version date source
colorspace 1.2-6 2015-03-11 CRAN (R 3.2.2)
devtools * 1.9.1.9000 2016-01-21 local
digest 0.6.9 2016-01-08 CRAN (R 3.2.3)
feather * 0.0.0.9000 2016-03-30 Github (wesm/feather@a58e3be)
ggplot2 2.1.0 2016-03-01 CRAN (R 3.2.2)
gtable 0.2.0 2016-02-26 CRAN (R 3.2.2)
memoise 1.0.0 2016-01-29 CRAN (R 3.2.2)
microbenchmark * 1.4-2.1 2015-11-25 CRAN (R 3.2.2)
munsell 0.4.3 2016-02-13 CRAN (R 3.2.2)
plyr 1.8.3 2015-06-12 CRAN (R 3.2.2)
Rcpp 0.12.4 2016-03-26 CRAN (R 3.2.2)
scales 0.4.0 2016-02-26 CRAN (R 3.2.2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment