Skip to content

Instantly share code, notes, and snippets.

@grimbough
Created January 22, 2019 16:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save grimbough/e7477d1b40a61a6f012e0fda2299fc42 to your computer and use it in GitHub Desktop.
Save grimbough/e7477d1b40a61a6f012e0fda2299fc42 to your computer and use it in GitHub Desktop.
Benchmarking improvements in rhdf5 index to hyperslab selection
BiocManager::install("grimbough/rhdf5", ref = "f06ab6f",
update = FALSE, ask = TRUE, INSTALL_opts = c('--no-lock'))
suppressPackageStartupMessages(library(rhdf5))
suppressPackageStartupMessages(library(microbenchmark))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(stringr))
h5file <- "/tmpdata/msmith/ExperimentHub/2230"
dm <- h5read(h5file, name = "assay001")
f1 <- function(i, type) {
row_index <- switch(type,
seq = seq(1,nrow(dm), length.out = i),
rand = sample(1:nrow(dm), i),
lin = 1:i)
h5read(h5file, name = "assay001", index = list(row_index, NULL))
}
bm_old <- microbenchmark(
f1(100, 'seq'), f1(500, 'seq'), f1(1000, 'seq'), f1(2000, 'seq'), f1(3000, 'seq'), f1(4500, 'seq'),
f1(6000, 'seq'), f1(7000, 'seq'), f1(8000, 'seq'), f1(9000, 'seq'), f1(10000, 'seq'), f1(12000, 'seq'),
f1(100, 'rand'), f1(500, 'rand'), f1(1000, 'rand'), f1(2000, 'rand'), f1(3000, 'rand'), f1(4500, 'rand'),
f1(6000, 'rand'), f1(7000, 'rand'), f1(8000, 'rand'), f1(9000, 'rand'), f1(10000, 'rand'), f1(12000, 'rand'),
f1(100, 'lin'), f1(500, 'lin'), f1(1000, 'lin'), f1(2000, 'lin'), f1(3000, 'lin'), f1(4500, 'lin'),
f1(6000, 'lin'), f1(7000, 'lin'), f1(8000, 'lin'), f1(9000, 'lin'), f1(10000, 'lin'), f1(12000, 'lin'),
times = 8
)
res1 <- bm_old %>% mutate(index_length = str_extract(expr, "[0-9]{3,}") %>% as.integer(),
type = str_extract(expr, "[a-z]{3,}"),
rhdf5_version = Biobase::package.version('rhdf5')) %>%
select(-expr) %>%
as_tibble()
message(Biobase::package.version('rhdf5'))
set.seed(1234)
rand_old <- f1(i = 10000, type = "rand")
############################
unloadNamespace('rhdf5')
BiocManager::install('grimbough/rhdf5', ref = "02fcc60",
update = FALSE, ask = TRUE, INSTALL_opts = c('--no-lock'))
library(rhdf5)
message(Biobase::package.version('rhdf5'))
bm_new <- microbenchmark(
f1(100, 'seq'), f1(500, 'seq'), f1(1000, 'seq'), f1(2000, 'seq'), f1(3000, 'seq'), f1(4500, 'seq'),
f1(6000, 'seq'), f1(7000, 'seq'), f1(8000, 'seq'), f1(9000, 'seq'), f1(10000, 'seq'), f1(12000, 'seq'),
f1(100, 'rand'), f1(500, 'rand'), f1(1000, 'rand'), f1(2000, 'rand'), f1(3000, 'rand'), f1(4500, 'rand'),
f1(6000, 'rand'), f1(7000, 'rand'), f1(8000, 'rand'), f1(9000, 'rand'), f1(10000, 'rand'), f1(12000, 'rand'),
f1(100, 'lin'), f1(500, 'lin'), f1(1000, 'lin'), f1(2000, 'lin'), f1(3000, 'lin'), f1(4500, 'lin'),
f1(6000, 'lin'), f1(7000, 'lin'), f1(8000, 'lin'), f1(9000, 'lin'), f1(10000, 'lin'), f1(12000, 'lin'),
times = 8
)
res2 <- bm_new %>% mutate(index_length = str_extract(expr, "[0-9]{3,}") %>% as.integer(),
type = str_extract(expr, "[a-z]{3,}"),
rhdf5_version = Biobase::package.version('rhdf5')) %>%
select(-expr) %>%
as_tibble()
set.seed(1234)
rand_new <- f1(i = 10000, type = "rand")
## check we have the same objects from both versions
message(identical(rand_old, rand_new))
## plot time taken
ggplot(bind_rows(res1, res2), aes(x = index_length, y = time / 1e9)) +
geom_point(aes(color = type, shape = rhdf5_version)) +
geom_smooth(aes(linetype = rhdf5_version, color = type), se = FALSE) +
theme_bw()
@lwaldron
Copy link

Nice power-ups to the benchmarking code!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment