Skip to content
{{ message }}

Instantly share code, notes, and snippets.

# r2evans/MapReduce.R

Last active Jun 15, 2019
Combination of R's Reduce function with Map's k-ary arguments
 #' 'Reduce' with arbitrary number of arguments #' #' Applies a function to the corresponding elements of given vectors, #' in a reductionist way. (This is *not* related to the [Apache Hadoop #' MapReduce](https://hadoop.apache.org/) project ... while this may #' suggest the name 'MapReduce' is a poor choice for this function, it #' is a logical combination of R's [Map()] and [Reduce()] functions.) #' #' @details #' #' The function is called with 'k' vectors as arguments, and the #' function should return either a list of k single values, or a #' single return value. If a singleton, then the value is assumed to #' replace the 'i'th value within the first argument (list or vector), #' and the next iteration's "previous" values will be take from this #' and this i'th value from all vectors except the first; if a #' k-length list, then this list is used as the next iteration's #' "previous" (the other value in each of the k argument vectors). #' #' If 'init' is given, this logically adds it to the start (when #' proceeding left-to-right) or the end of the input vectors, #' respectively. If these possibly-augmented vectors have n > 1 #' vectors, 'MapReduce' successively applies f to the first (last) 2 #' values of each vector. #' #' @section Translation from 'Reduce': #' #' Not all functions will translate directly from [Reduce()]. Many #' functions that work well with 'Reduce' expect exactly two #' arguments, with an effective length of 1 each; one example is '+' #' and other binary operators. In contrast, 'MapReduce' expects k #' vectors and will operate on all vectors (of length 2) in a call. #' #' While the use of binary operators as the sole function passed to #' 'MapReduce' is ignoring the 'Map' component, to translate binary #' operators, one must wrap the function so that it expects a single #' argument of length two. For example #' #' ``` #' binary_func <- function(a, b) `+`(a, b) # Reduce #' binary_func2 <- function(u) `+`(u, u) # MapReduce #' ``` #' #' Author: Bill Evans (bill@8pawexpress.com) #' #' License: MIT: use as you will, no warranty, keep this license and #' citation if re-distributed #' #' @param f a function of the 'k' arity if this is called with k #' arguments. #' @param ... vectors or lists #' @param init an R object of the same kind as the elements of 'x'. #' @param right a logical indicating whether to proceed from left to #' right (default) or from right to left. #' @param accumulate a logical indicating whether the successive #' reduce combinations should be accumulated. By default, only the #' final combination is used. #' @return 'list', possibly with nested lists, and further if #' 'accumulate' is true #' @export #' @md #' @examples #' ### Adaptations from [Reduce()] documentation: #' x <- list(1, 2, 3) #' Reduce("+", x) #' # "+" is not a good function for MapReduce, as it expects two #' # separate inputs instead of one or more vectors, length 2; so #' # instead, we must adapt #' MapReduce(function(a) a + a, x) #' # perhaps [sum()] is better #' MapReduce(sum, x) #' #' Reduce("+", x, accumulate = TRUE) #' MapReduce(sum, x, accumulate = TRUE) #' #' cfrac_binary <- function(u, v) u + 1 / v #' cfrac_unary <- function(u) u + 1 / u #' ## Continued fraction approximation for pi: #' x <- c(3, 7, 15, 1, 292) #' Reduce(cfrac_binary, x, right = TRUE) #' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE) #' #' ## Continued fraction approximation for Euler's number (e): #' x <- c(2, 1, 2, 1, 1, 4, 1, 1, 6, 1, 1, 8) #' Reduce(cfrac_binary, x, right = TRUE) #' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE) #' #' ### Extensions, allowing for more arguments #' #' # suggested by https://stackoverflow.com/q/56612304 #' if (require("data.table")) { #' #' nT <- 5 #' int <- rep(1.1, 5) #' loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1)))) #' f <- function(payments, interests) { #' list(payments + interests * payments, interests) #' } #' #' loan[, c("interest", "balance") := 0 #' ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE) #' ][,interest := c(0, diff(balance) - payment[-1]) #' ] #' #' ### that was boring, 'int' was unchanging; here's the impetus for MapReduce #' #' set.seed(2) #' (int <- rnorm(nT, mean = 0.1, sd = 0.02) + 1) #' #  1.082062 1.103697 1.131757 1.077392 1.098395 #' #' # refresh 'loan' #' loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1)))) #' #' loan[, c("interest", "balance") := 0 #' ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE) #' ][,interest := c(0, diff(balance) - payment[-1]) #' ] #' #' } MapReduce <- function (f, ..., init, right = FALSE, accumulate = FALSE, simplify = FALSE) { mis <- missing(init) xs <- list(...) lens <- lengths(xs) len1 <- max(lens) if (length(lens) == 0L || len1 == 0L) return(if (mis) NULL else init) if (!all(lens %in% c(len1, 1L))) { stop("all arguments must be same length or length 1") } xs[lens == 1L] <- Map(replicate, len1[any(lens == 1L)], xs[lens == 1L]) f <- match.fun(f) xs <- lapply(xs, function(x) if (!is.vector(x) || is.object(x)) as.list(x) else x) ind <- seq_len(len1) if (mis) { if (right) { init <- lapply(xs, `[[`, len1) ind <- ind[-len1] } else { init <- lapply(xs, `[[`, 1L) ind <- ind[-1L] } } if (!accumulate) { if (right) { for (i in rev(ind)) { out <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init))) if (is.list(out)) { if (length(out) == length(init)) { init <- out } else { stop("function output is not length 1 or same length as 'init'") } } else { xs[][[i]] <- out init <- lapply(xs, `[[`, i) } } } else { for (i in ind) { out <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i)))) if (is.list(out)) { if (length(out) == length(init)) { init <- out } else { stop("function output is not length 1 or same length as 'init'") } } else { xs[][[i]] <- out init <- lapply(xs, `[[`, i) } } } if (!isFALSE(simplify) && length(out)) init <- simplify2array(init[], higher = (simplify == "array")) init } else { len <- length(ind) + 1L out <- vector("list", len) if (mis) { if (right) { out[[len]] <- init for (i in rev(ind)) { out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init))) if (is.list(out1)) { if (length(out1) == length(init)) { out[[i]] <- init <- out1 } else { stop("function output is not a list length 1 or same length as 'init'") } } else { xs[][[i]] <- out1 out[[i]] <- init <- lapply(xs, `[[`, i) } } } else { out[[1L]] <- init for (i in ind) { out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i)))) if (is.list(out1)) { if (length(out1) == length(init)) { out[[i]] <- init <- out1 } else { stop("function output is not length 1 or same length as 'init'") } } else { xs[][[i]] <- out1 out[[i]] <- init <- lapply(xs, `[[`, i) } } } } else { if (right) { out[[len]] <- init for (i in rev(ind)) { out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init))) if (is.list(out1)) { if (length(out1) == length(init)) { out[[i]] <- init <- out1 } else { stop("function output is not a list length 1 or same length as 'init'") } } else { xs[][[i]] <- out1 out[[i]] <- init <- lapply(xs, `[[`, i) } } } else { for (i in ind) { out[[i]] <- init out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i)))) if (is.list(out1)) { if (length(out1) == length(init)) { init <- out1 } else { stop("function output is not length 1 or same length as 'init'") } } else { xs[][[i]] <- out1 init <- lapply(xs, `[[`, i) } } out[[len]] <- init } } if (all(lengths(out) == 1L)) out <- unlist(out, recursive = FALSE) if (!isFALSE(simplify) && length(out)) out <- simplify2array(lapply(out, `[[`, 1), higher = (simplify == "array")) out } }
to join this conversation on GitHub. Already have an account? Sign in to comment