Skip to content

Instantly share code, notes, and snippets.

@r2evans

r2evans/MapReduce.R

Last active Jun 15, 2019
Embed
What would you like to do?
Combination of R's Reduce function with Map's k-ary arguments
#' 'Reduce' with arbitrary number of arguments
#'
#' Applies a function to the corresponding elements of given vectors,
#' in a reductionist way. (This is *not* related to the [Apache Hadoop
#' MapReduce](https://hadoop.apache.org/) project ... while this may
#' suggest the name 'MapReduce' is a poor choice for this function, it
#' is a logical combination of R's [Map()] and [Reduce()] functions.)
#'
#' @details
#'
#' The function is called with 'k' vectors as arguments, and the
#' function should return either a list of k single values, or a
#' single return value. If a singleton, then the value is assumed to
#' replace the 'i'th value within the first argument (list or vector),
#' and the next iteration's "previous" values will be take from this
#' and this i'th value from all vectors except the first; if a
#' k-length list, then this list is used as the next iteration's
#' "previous" (the other value in each of the k argument vectors).
#'
#' If 'init' is given, this logically adds it to the start (when
#' proceeding left-to-right) or the end of the input vectors,
#' respectively. If these possibly-augmented vectors have n > 1
#' vectors, 'MapReduce' successively applies f to the first (last) 2
#' values of each vector.
#'
#' @section Translation from 'Reduce':
#'
#' Not all functions will translate directly from [Reduce()]. Many
#' functions that work well with 'Reduce' expect exactly two
#' arguments, with an effective length of 1 each; one example is '+'
#' and other binary operators. In contrast, 'MapReduce' expects k
#' vectors and will operate on all vectors (of length 2) in a call.
#'
#' While the use of binary operators as the sole function passed to
#' 'MapReduce' is ignoring the 'Map' component, to translate binary
#' operators, one must wrap the function so that it expects a single
#' argument of length two. For example
#'
#' ```
#' binary_func <- function(a, b) `+`(a, b) # Reduce
#' binary_func2 <- function(u) `+`(u[1], u[2]) # MapReduce
#' ```
#'
#' Author: Bill Evans (bill@8pawexpress.com)
#'
#' License: MIT: use as you will, no warranty, keep this license and
#' citation if re-distributed
#'
#' @param f a function of the 'k' arity if this is called with k
#' arguments.
#' @param ... vectors or lists
#' @param init an R object of the same kind as the elements of 'x'.
#' @param right a logical indicating whether to proceed from left to
#' right (default) or from right to left.
#' @param accumulate a logical indicating whether the successive
#' reduce combinations should be accumulated. By default, only the
#' final combination is used.
#' @return 'list', possibly with nested lists, and further if
#' 'accumulate' is true
#' @export
#' @md
#' @examples
#' ### Adaptations from [Reduce()] documentation:
#' x <- list(1, 2, 3)
#' Reduce("+", x)
#' # "+" is not a good function for MapReduce, as it expects two
#' # separate inputs instead of one or more vectors, length 2; so
#' # instead, we must adapt
#' MapReduce(function(a) a[1] + a[2], x)
#' # perhaps [sum()] is better
#' MapReduce(sum, x)
#'
#' Reduce("+", x, accumulate = TRUE)
#' MapReduce(sum, x, accumulate = TRUE)
#'
#' cfrac_binary <- function(u, v) u + 1 / v
#' cfrac_unary <- function(u) u[1] + 1 / u[2]
#' ## Continued fraction approximation for pi:
#' x <- c(3, 7, 15, 1, 292)
#' Reduce(cfrac_binary, x, right = TRUE)
#' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE)
#'
#' ## Continued fraction approximation for Euler's number (e):
#' x <- c(2, 1, 2, 1, 1, 4, 1, 1, 6, 1, 1, 8)
#' Reduce(cfrac_binary, x, right = TRUE)
#' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE)
#'
#' ### Extensions, allowing for more arguments
#'
#' # suggested by https://stackoverflow.com/q/56612304
#' if (require("data.table")) {
#'
#' nT <- 5
#' int <- rep(1.1, 5)
#' loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1))))
#' f <- function(payments, interests) {
#' list(payments[2] + interests[2] * payments[1], interests[2])
#' }
#'
#' loan[, c("interest", "balance") := 0
#' ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE)
#' ][,interest := c(0, diff(balance) - payment[-1])
#' ]
#'
#' ### that was boring, 'int' was unchanging; here's the impetus for MapReduce
#'
#' set.seed(2)
#' (int <- rnorm(nT, mean = 0.1, sd = 0.02) + 1)
#' # [1] 1.082062 1.103697 1.131757 1.077392 1.098395
#'
#' # refresh 'loan'
#' loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1))))
#'
#' loan[, c("interest", "balance") := 0
#' ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE)
#' ][,interest := c(0, diff(balance) - payment[-1])
#' ]
#'
#' }
MapReduce <- function (f, ..., init, right = FALSE, accumulate = FALSE, simplify = FALSE) {
mis <- missing(init)
xs <- list(...)
lens <- lengths(xs)
len1 <- max(lens)
if (length(lens) == 0L || len1 == 0L)
return(if (mis) NULL else init)
if (!all(lens %in% c(len1, 1L))) {
stop("all arguments must be same length or length 1")
}
xs[lens == 1L] <- Map(replicate, len1[any(lens == 1L)], xs[lens == 1L])
f <- match.fun(f)
xs <- lapply(xs, function(x) if (!is.vector(x) || is.object(x)) as.list(x) else x)
ind <- seq_len(len1)
if (mis) {
if (right) {
init <- lapply(xs, `[[`, len1)
ind <- ind[-len1]
} else {
init <- lapply(xs, `[[`, 1L)
ind <- ind[-1L]
}
}
if (!accumulate) {
if (right) {
for (i in rev(ind)) {
out <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
if (is.list(out)) {
if (length(out) == length(init)) {
init <- out
} else {
stop("function output is not length 1 or same length as 'init'")
}
} else {
xs[[1]][[i]] <- out
init <- lapply(xs, `[[`, i)
}
}
} else {
for (i in ind) {
out <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
if (is.list(out)) {
if (length(out) == length(init)) {
init <- out
} else {
stop("function output is not length 1 or same length as 'init'")
}
} else {
xs[[1]][[i]] <- out
init <- lapply(xs, `[[`, i)
}
}
}
if (!isFALSE(simplify) && length(out))
init <- simplify2array(init[[1]], higher = (simplify == "array"))
init
} else {
len <- length(ind) + 1L
out <- vector("list", len)
if (mis) {
if (right) {
out[[len]] <- init
for (i in rev(ind)) {
out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
if (is.list(out1)) {
if (length(out1) == length(init)) {
out[[i]] <- init <- out1
} else {
stop("function output is not a list length 1 or same length as 'init'")
}
} else {
xs[[1]][[i]] <- out1
out[[i]] <- init <- lapply(xs, `[[`, i)
}
}
} else {
out[[1L]] <- init
for (i in ind) {
out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
if (is.list(out1)) {
if (length(out1) == length(init)) {
out[[i]] <- init <- out1
} else {
stop("function output is not length 1 or same length as 'init'")
}
} else {
xs[[1]][[i]] <- out1
out[[i]] <- init <- lapply(xs, `[[`, i)
}
}
}
} else {
if (right) {
out[[len]] <- init
for (i in rev(ind)) {
out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
if (is.list(out1)) {
if (length(out1) == length(init)) {
out[[i]] <- init <- out1
} else {
stop("function output is not a list length 1 or same length as 'init'")
}
} else {
xs[[1]][[i]] <- out1
out[[i]] <- init <- lapply(xs, `[[`, i)
}
}
} else {
for (i in ind) {
out[[i]] <- init
out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
if (is.list(out1)) {
if (length(out1) == length(init)) {
init <- out1
} else {
stop("function output is not length 1 or same length as 'init'")
}
} else {
xs[[1]][[i]] <- out1
init <- lapply(xs, `[[`, i)
}
}
out[[len]] <- init
}
}
if (all(lengths(out) == 1L))
out <- unlist(out, recursive = FALSE)
if (!isFALSE(simplify) && length(out))
out <- simplify2array(lapply(out, `[[`, 1), higher = (simplify == "array"))
out
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment