Skip to content

Instantly share code, notes, and snippets.

@lindeloev
Last active September 15, 2022 07:23
Show Gist options
  • Save lindeloev/78671607c4257567999dfb6c50d4d484 to your computer and use it in GitHub Desktop.
Save lindeloev/78671607c4257567999dfb6c50d4d484 to your computer and use it in GitHub Desktop.
mutate_progress()
#' dplyr::mutate, but with a progress bar for grouped data
#'
#' @aliases mutate_progress
#' @export
#' @inheritParams dplyr::mutate
#' @param .prefix Text to show before the progress bar.
#' @param .format Format for the progress bar. See documentation in \code{\link[progress]{progress_bar}}.
#' - ":what" identifies the current group.
#' - ":total" is the total number of groups.
#' - ":current" is the current group number.
#' @param .progress_args Further arguments to
#' \code{\link[progress]{progress_bar}}.
#' @details The order of computation differs between \code{\link[dplyr]{mutate}}
#' and `mutate_progress`. \code{\link[dplyr]{mutate}} computes for all groups at
#' each variable before proceeding to the next variable. `mutate_progress`
#' computes for all variables in each group, before proceeding to the next
#' group.
#' @encoding UTF-8
#' @author Jonas Kristoffer Lindeløv \email{jonas@@lindeloev.dk}
#' @examples
#' library(dplyr)
#' slow_mean = function(x) {
#' Sys.sleep(runif(1, 0.3, 0.8))
#' mean(x)
#' }
#'
#' mtcars %>%
#' # Default usage
#' group_by(cyl, gear) %>%
#' mutate_progress(
#' first_mean = slow_mean(gear * mpg),
#' .prefix = "First mutate"
#' ) %>%
#'
#' # Control appearance
#' group_by(vs, am, carb) %>%
#' mutate_progress(
#' second_mean = slow_mean(wt),
#' .format = "[:bar] :percent (remaining: :eta)"
#' )
mutate_progress = function(.data, ..., .keep = c("all", "used", "unused", "none"), .before = NULL, .after = NULL, .prefix = "", .format = "[:bar] :what (:current/:total)", .progress_args = list(incomplete = " ")) {
# Don't initiate progress bars for ungrouped data
if (length(groups(.data)) == 0)
return(dplyr::mutate(.data, ..., .keep = .keep, .before = .before, .after = .after))
# Set prefix
stopifnot(is.character(.prefix))
stopifnot(length(.prefix) == 1)
stopifnot(is.character(.format))
stopifnot(length(.format) == 1)
stopifnot(is.list(.progress_args))
if (.prefix != "")
.format = paste0(.prefix, ": ", .format)
# Initiate progress bar
group_names = group_keys(.data) %>%
Map(paste, names(.), ., sep = ':') %>%
as.data.frame() %>%
apply(1, paste0, collapse = ", ")
progress_args = c(
list(
format = .format,
total = dplyr::n_groups(.data)
),
.progress_args
)
pb = do.call(progress::progress_bar$new, progress_args)
# Call dplyr::mutate by group x variable (rather than variable x group)
mutate_single_group = function(.data, .groups, ...) {
pb$tick(token = list(what = group_names[pb$.__enclos_env__$private$current + 1]))
.data %>%
dplyr::mutate(.groups) %>% # Add them back in
dplyr::mutate(...) %>%
dplyr::select(-!!names(.groups)) # Remove again
}
group_modify_args = c(
list(
.data = .data,
.f = mutate_single_group
),
match.call(expand.dots = FALSE)$`...`
)
do.call(dplyr::group_modify, group_modify_args)
}
@lindeloev
Copy link
Author

@GGLuca It's from the progress package. Just updated the code

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment