Skip to content

Instantly share code, notes, and snippets.

@simonpcouch
Created November 18, 2022 15:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save simonpcouch/651d0ea4d968b455ded8194578dabf52 to your computer and use it in GitHub Desktop.
Save simonpcouch/651d0ea4d968b455ded8194578dabf52 to your computer and use it in GitHub Desktop.
# benchmarking the new parsnip release
library(tidymodels)
# with v1.0.2 ------------------------------------------------------------
pak::pkg_install("tidymodels/parsnip@v1.0.2")
num_samples <- 10^(3:7)
num_resamples <- c(5, 10, 20)
nrow <- length(num_samples) * length(num_resamples)
res_v102 <-
tibble(
version = character(nrow),
num_samples = numeric(nrow),
num_resamples = numeric(nrow),
time_to_fit = numeric(nrow)
)
set.seed(1)
for (i in seq_along(num_samples)) {
dat <- tibble(x = rnorm(num_samples[i]), y = x + rnorm(num_samples[i], sd = .2))
for (j in seq_along(num_resamples)) {
folds <- vfold_cv(dat, v = num_resamples[j])
timing <-
system.time({
fit_resamples(linear_reg(), y ~ x, folds)
})
idx <- (length(num_samples) * (j - 1)) + i
res_v102[idx,] <- list("v1.0.2", num_samples[i], num_resamples[j], timing[["elapsed"]])
}
}
# with v1.0.3 ------------------------------------------------------------
pak::pkg_install("tidymodels/parsnip@v1.0.3")
res_v103 <-
tibble(
version = character(nrow),
num_samples = numeric(nrow),
num_resamples = numeric(nrow),
time_to_fit = numeric(nrow)
)
set.seed(1)
for (i in seq_along(num_samples)) {
dat <- tibble(x = rnorm(num_samples[i]), y = x + rnorm(num_samples[i], sd = .2))
for (j in seq_along(num_resamples)) {
folds <- vfold_cv(dat, v = num_resamples[j])
timing <-
system.time({
fit_resamples(linear_reg(), y ~ x, folds)
})
idx <- (length(num_samples) * (j - 1)) + i
res_v103[idx,] <- list("v1.0.3", num_samples[i], num_resamples[j], timing[["elapsed"]])
}
}
# plotting ---------------------------------------------------------------
res <- bind_rows(res_v102, res_v103)
res_plot <-
res %>%
pivot_wider(
id_cols = c(num_samples, num_resamples),
names_from = version,
values_from = time_to_fit
) %>%
mutate(speedup = `v1.0.2` / `v1.0.3`) %>%
select(-starts_with("v")) %>%
mutate(num_resamples = factor(num_resamples, levels = c("20", "10", "5"), ordered = TRUE)) %>%
ggplot() +
aes(x = num_samples, y = speedup, col = num_resamples) +
geom_line() +
scale_x_log10() +
labs(
x = "Number of Rows in Training Data",
y = "Speedup (v1.0.2 / v1.0.3)",
col = "Number \nof Folds",
title = "tidymodels got a lot faster!",
subtitle = "The new release of parsnip contributed a significant speedup for model fitting."
) +
scale_color_viridis_d(end = .8) +
theme(plot.subtitle = element_text(face = "italic"))
ggsave("speedup.png", res_plot, width = 6, height = 4, dpi = 400)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment