Skip to content

Instantly share code, notes, and snippets.

@jmbarbone
Last active November 5, 2022 02:47
Show Gist options
  • Save jmbarbone/144eaf1fb16d5bb32d5a57f41441c293 to your computer and use it in GitHub Desktop.
Save jmbarbone/144eaf1fb16d5bb32d5a57f41441c293 to your computer and use it in GitHub Desktop.
rle() with some enhancements. Repurposed from a separate script
# function has been repurposed
# a few edits from the original have been made without testing
#' Run length encode
#'
#' Encodes a run length and returns the start and stop
#'
#' @param x A vector of values for compute the length of the run
#' @param times If `NULL` will use the position of the start and stop runs,
#' otherwise will return the values returned; if not `NULL`, must be equal
#' length as `x`
#' @param overlap Logical, if `TRUE` will move the `end` position (or value) of
#' single length `x` sets to the start of the next set. If `FALSE`, the _middle_
#' distance between the start and end is used (good for preventing strict
#' cutoffs and _filling_).
#'
#' @returns A `data.frame` with the run value, the length, and the start and
#' stop positions/`times` as columns
#'
#' @seealso [base::rle]
#' @author [Jordan Mark Barbone](https://github.com/jmbarbone)
#'
#' @examples
#' # Demonstrations of rle:
#' x <- rev(rep(6:10, 1:5))
#'
#' times <- c(0.96, 1.85, 2.49, 3.46, 4.08, 4.41, 4.76,
#' 5.15, 5.94, 5.98, 6.73, 7.40, 7.57, 7.84, 8.35)
#'
#' rle(x)
#' run_length_encode(x)
#' run_length_encode(x, times)
#' run_length_encode(x, times, overlap = TRUE)
#'
#' if (requireNamespace("dplyr", quietly = TRUE)) {
#' library(dplyr)
#' # Can be used with dplyr to summarise runs
#' df <- data.frame(x, times, group = rep(c("a", "a", "b", "c", "c"), 1:5))
#' df %>%
#' dplyr::group_by(group) %>%
#' dplyr::summarise(run_length_encode(x, times))
#' }
#'
#' @export
run_length_encode <- function(x, times = NULL, overlap = FALSE) {
ls <- unclass(rle(x))
end <- cumsum(ls$lengths)
start <- end - ls$lengths + 1L
n <- length(end)
if (!is.null(times)) {
if (length(x) != length(times)) {
stop("Length of x and times must be the same", call. = FALSE)
}
end <- times[end]
start <- times[start]
}
if (isTRUE(overlap) & n >= 3) {
new <- (start[2:n] + end[1:(n - 1)]) / 2
start[2:n] <- new
end[1:(n - 1)] <- new
}
names(ls) <- c("run_length", deparse1(substitute(x)))
ls <- c(ls[2:1], list(start = start, end = end))
as.data.frame(ls)
}
# Demonstrations of rle:
x <- rev(rep(6:10, 1:5))
times <- c(0.96, 1.85, 2.49, 3.46, 4.08, 4.41, 4.76,
5.15, 5.94, 5.98, 6.73, 7.40, 7.57, 7.84, 8.35)
rle(x)
#> Run Length Encoding
#> lengths: int [1:5] 5 4 3 2 1
#> values : int [1:5] 10 9 8 7 6
run_length_encode(x)
#> x run_length start end
#> 1 10 5 1 5
#> 2 9 4 6 9
#> 3 8 3 10 12
#> 4 7 2 13 14
#> 5 6 1 15 15
run_length_encode(x, times)
#> x run_length start end
#> 1 10 5 0.96 4.08
#> 2 9 4 4.41 5.94
#> 3 8 3 5.98 7.40
#> 4 7 2 7.57 7.84
#> 5 6 1 8.35 8.35
run_length_encode(x, times, overlap = TRUE)
#> x run_length start end
#> 1 10 5 0.960 4.245
#> 2 9 4 4.245 5.960
#> 3 8 3 5.960 7.485
#> 4 7 2 7.485 8.095
#> 5 6 1 8.095 8.350
if (requireNamespace("dplyr", quietly = TRUE)) {
library(dplyr)
# Can be used with dplyr to summarise runs
df <- data.frame(x, times, group = rep(c("a", "a", "b", "c", "c"), 1:5))
df %>%
dplyr::group_by(group) %>%
dplyr::summarise(run_length_encode(x, times))
}
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
#> `summarise()` has grouped output by 'group'. You can override using the
#> `.groups` argument.
#> # A tibble: 7 × 5
#> # Groups: group [3]
#> group x run_length start end
#> <chr> <int> <int> <dbl> <dbl>
#> 1 a 10 3 0.96 2.49
#> 2 b 10 2 3.46 4.08
#> 3 b 9 1 4.41 4.41
#> 4 c 9 3 4.76 5.94
#> 5 c 8 3 5.98 7.4
#> 6 c 7 2 7.57 7.84
#> 7 c 6 1 8.35 8.35
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment