Skip to content

Instantly share code, notes, and snippets.

@ptoche
Created November 12, 2018 15:12
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save ptoche/7917cfcec9391656d8bdf727f3e1d808 to your computer and use it in GitHub Desktop.
# benchmark computing time differences
n = 1e6
set.seed(1)
Date = sort(replicate(n, paste0(Sys.Date(), " ",
sample(0:23, 1), ":", sample(0:59, 1), ":", sample(0:59, 1))))
Date <- as.POSIXct(Date, format="%Y-%m-%d %H:%M:%S", tz = "Europe/Paris")
diff_seq_along <- function(x) {
x[seq_along(x)+1]-x[seq_along(x)]
}
diff_head_tail <- function(x) {
head(tail(x, -1) - head(x, -1), -1)
}
diff_as_matrix <- function(x) {
data.frame(diff(as.matrix(as.numeric(x))))
}
diff_length <- function(x) {
x[-1] - x[-length(x)]
}
diff_data_table <- function(x) {
library(data.table)
dt <- as.data.table(x)
setDT(dt)[, lapply(.SD, function(x) (x- shift(x))[-1])]
}
diff_dplyr <- function(x) {
x = as.numeric(x)
library(dplyr)
x - lag(x) %>%
na.omit()
}
library(microbenchmark)
bench <- microbenchmark(diff_seq_along(Date), diff_head_tail(Date), diff_as_matrix(Date), diff_length(Date), diff_data_table(Date), times = 1000)
remove_outliers <- function(x, probs = c(.25, .75), whiskers = 1.5, na.rm = FALSE, ...) {
qnt <- quantile(x, probs = probs, na.rm = na.rm, ...)
H <- whiskers * IQR(x, na.rm = na.rm)
y <- x
y[x < (qnt[1] - H)] <- NA
y[x > (qnt[2] + H)] <- NA
y
}
bench$timing <- sapply(bench$time, remove_outliers)
## library dplyr to compute cumulated sums
library(dplyr)
df <- bench %>%
select(expr, timing) %>%
group_by(expr) %>%
mutate(timing = cumsum(timing)) %>%
slice(which.max(timing)) %>%
arrange(desc(timing)) # doesn't help in ggplot
# Clean names
# Vectorized version of gsub
gsub2 <- function(pattern, replacement, x, ...) {
for(i in 1:length(pattern))
x <- gsub(pattern[i], replacement[i], x, ...)
x
}
key <- list("diff_seq_along(Date)" = "seq_along", "diff_head_tail(Date)" = "head_tail","diff_as_matrix(Date)" = "as_matrix", "diff_length(Date)" = "diff_length", "diff_data_table(Date)" = "data_table") # gsub needs fixed=TRUE to handle parenthesis
pattern <- names(key)
replacement <- unname(unlist(key))
df$expr <- gsub2(pattern, replacement, df$expr, fixed = TRUE)
library(ggplot2)
library(scales)
ggplot(data = df, aes(x = reorder(expr, -timing), y = timing, fill = expr)) +
geom_col() +
scale_y_continuous(breaks = NULL) +
labs(x = "\nmethod", y = "times") -> p
# Fix x-labels
is.odd <- function(x) x%%2 != 0
labelz <- function(x) paste0(ifelse(is.odd(seq_along(x)), "", "\n\n"), x)
labels <- labelz(df$expr)
p + scale_x_discrete(labels = labels)
ggsave("benchmark-diff.pdf")
@ptoche
Copy link
Author

ptoche commented Nov 12, 2018

benchmark-diff

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment