Skip to content

Instantly share code, notes, and snippets.

@mrdwab
Created February 11, 2018 05:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrdwab/6b7b37db90037c114577f93ed391a022 to your computer and use it in GitHub Desktop.
Save mrdwab/6b7b37db90037c114577f93ed391a022 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(data.table)
library(microbenchmark)
set.seed(1)
s <- sample(5, 10000, TRUE)
Sample <- rep(seq_along(s), s)
df <- data.frame(Sample,
motif = sample(LETTERS[1:10], length(Sample), TRUE),
chromosome = sample(2, length(Sample), TRUE))
myfun <- function() {
df %>%
mutate_at(c("motif", "chromosome"), factor) %>%
mutate(value = 1) %>%
distinct() %>%
mutate(key = interaction(motif, chromosome)) %>%
select(-motif, -chromosome) %>%
spread(key, value, fill = 0, drop = FALSE)
}
florianfun <- function() {
df %>%
mutate(value=1) %>%
complete(motif,chromosome,Sample,fill=list(value=0)) %>%
mutate(key=paste0(motif,',chr',chromosome)) %>%
group_by(Sample,key) %>%
summarize(value = sum(value)) %>%
spread(key,value)
}
cols <- c("motif", "chromosome")
dtfun <- function() {
as.data.table(df)[, (cols) := lapply(.SD, factor), .SDcols = cols][
, dcast(unique(.SD)[, value := 1L], Sample ~ motif + chromosome, value.var = "value", fill = 0L, drop = FALSE)]
}
res <- microbenchmark(myfun(), dtfun(), florianfun(), times = 20)
res
# Unit: milliseconds
# expr min lq mean median uq max neval
# myfun() 67.31364 68.33951 71.37471 69.33770 72.01097 84.06912 20
# dtfun() 19.86292 20.16175 21.23593 20.34515 20.79610 28.40097 20
# florianfun() 510.52688 517.95171 532.06350 524.14986 534.53571 609.94733 20
autoplot(res)
@mrdwab
Copy link
Author

mrdwab commented Feb 11, 2018

Autoplot Results

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment