r2evans/MapReduce.R

## MapReduce.R
#' 'Reduce' with arbitrary number of arguments
#'
#' Applies a function to the corresponding elements of given vectors,
#' in a reductionist way. (This is *not* related to the [Apache Hadoop
#' MapReduce](https://hadoop.apache.org/) project ... while this may
#' suggest the name 'MapReduce' is a poor choice for this function, it
#' is a logical combination of R's [Map()] and [Reduce()] functions.)
#'
#' @details
#'
#' The function is called with 'k' vectors as arguments, and the
#' function should return either a list of k single values, or a
#' single return value. If a singleton, then the value is assumed to
#' replace the 'i'th value within the first argument (list or vector),
#' and the next iteration's "previous" values will be take from this
#' and this i'th value from all vectors except the first; if a
#' k-length list, then this list is used as the next iteration's
#' "previous" (the other value in each of the k argument vectors).
#'
#' If 'init' is given, this logically adds it to the start (when
#' proceeding left-to-right) or the end of the input vectors,
#' respectively. If these possibly-augmented vectors have n > 1
#' vectors, 'MapReduce' successively applies f to the first (last) 2
#' values of each vector.
#'
#' @section Translation from 'Reduce':
#'
#' Not all functions will translate directly from [Reduce()]. Many
#' functions that work well with 'Reduce' expect exactly two
#' arguments, with an effective length of 1 each; one example is '+'
#' and other binary operators. In contrast, 'MapReduce' expects k
#' vectors and will operate on all vectors (of length 2) in a call.
#'
#' While the use of binary operators as the sole function passed to
#' 'MapReduce' is ignoring the 'Map' component, to translate binary
#' operators, one must wrap the function so that it expects a single
#' argument of length two. For example
#'
#' ```
#' binary_func  <- function(a, b) `+`(a, b)    # Reduce
#' binary_func2 <- function(u) `+`(u[1], u[2]) # MapReduce
#' ```
#'
#' Author: Bill Evans (bill@8pawexpress.com)
#'
#' License: MIT: use as you will, no warranty, keep this license and
#' citation if re-distributed
#'
#' @param f a function of the 'k' arity if this is called with k
#'   arguments.
#' @param ... vectors or lists
#' @param init an R object of the same kind as the elements of 'x'.
#' @param right a logical indicating whether to proceed from left to
#'   right (default) or from right to left.
#' @param accumulate a logical indicating whether the successive
#'   reduce combinations should be accumulated. By default, only the
#'   final combination is used.
#' @return 'list', possibly with nested lists, and further if
#'   'accumulate' is true
#' @export
#' @md
#' @examples
#' ### Adaptations from [Reduce()] documentation:
#' x <- list(1, 2, 3)
#' Reduce("+", x)
#' # "+" is not a good function for MapReduce, as it expects two
#' # separate inputs instead of one or more vectors, length 2; so
#' # instead, we must adapt
#' MapReduce(function(a) a[1] + a[2], x)
#' # perhaps [sum()] is better
#' MapReduce(sum, x)
#'
#' Reduce("+", x, accumulate = TRUE)
#' MapReduce(sum, x, accumulate = TRUE)
#'
#' cfrac_binary <- function(u, v) u + 1 / v
#' cfrac_unary <- function(u) u[1] + 1 / u[2]
#' ## Continued fraction approximation for pi:
#' x <- c(3, 7, 15, 1, 292)
#' Reduce(cfrac_binary, x, right = TRUE)
#' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE)
#'
#' ## Continued fraction approximation for Euler's number (e):
#' x <- c(2, 1, 2, 1, 1, 4, 1, 1, 6, 1, 1, 8)
#' Reduce(cfrac_binary, x, right = TRUE)
#' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE)
#'
#' ### Extensions, allowing for more arguments
#'
#' # suggested by https://stackoverflow.com/q/56612304
#' if (require("data.table")) {
#'
#'   nT <- 5
#'   int <- rep(1.1, 5)
#'   loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1))))
#'   f <- function(payments, interests) {
#'     list(payments[2] + interests[2] * payments[1], interests[2])
#'   }
#'
#'   loan[, c("interest", "balance") := 0
#'        ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE)
#'          ][,interest := c(0, diff(balance) - payment[-1])
#'            ]
#'
#'   ### that was boring, 'int' was unchanging; here's the impetus for MapReduce
#'
#'   set.seed(2)
#'   (int <- rnorm(nT, mean = 0.1, sd = 0.02) + 1)
#'   # [1] 1.082062 1.103697 1.131757 1.077392 1.098395
#'
#'   # refresh 'loan'
#'   loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1))))
#'
#'   loan[, c("interest", "balance") := 0
#'        ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE)
#'          ][,interest := c(0, diff(balance) - payment[-1])
#'            ]
#'
#' }
MapReduce <- function (f, ..., init, right = FALSE, accumulate = FALSE, simplify = FALSE) {
  mis <- missing(init)
  xs <- list(...)
  lens <- lengths(xs)
  len1 <- max(lens)
  if (length(lens) == 0L || len1 == 0L)
    return(if (mis) NULL else init)
  if (!all(lens %in% c(len1, 1L))) {
    stop("all arguments must be same length or length 1")
  }
  xs[lens == 1L] <- Map(replicate, len1[any(lens == 1L)], xs[lens == 1L])
  f <- match.fun(f)
  xs <- lapply(xs, function(x) if (!is.vector(x) || is.object(x)) as.list(x) else x)
  ind <- seq_len(len1)
  if (mis) {
    if (right) {
      init <- lapply(xs, `[[`, len1)
      ind <- ind[-len1]
    } else {
      init <- lapply(xs, `[[`, 1L)
      ind <- ind[-1L]
    }
  }
  if (!accumulate) {
    if (right) {
      for (i in rev(ind)) {
        out <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
        if (is.list(out)) {
          if (length(out) == length(init)) {
            init <- out
          } else {
            stop("function output is not length 1 or same length as 'init'")
          }
        } else {
          xs[[1]][[i]] <- out
          init <- lapply(xs, `[[`, i)
        }
      }
    } else {
      for (i in ind) {
        out <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
        if (is.list(out)) {
          if (length(out) == length(init)) {
            init <- out
          } else {
            stop("function output is not length 1 or same length as 'init'")
          }
        } else {
          xs[[1]][[i]] <- out
          init <- lapply(xs, `[[`, i)
        }
      }
    }
    if (!isFALSE(simplify) && length(out))
      init <- simplify2array(init[[1]], higher = (simplify == "array"))
    init
  } else {
    len <- length(ind) + 1L
    out <- vector("list", len)
    if (mis) {
      if (right) {
        out[[len]] <- init
        for (i in rev(ind)) {
          out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
          if (is.list(out1)) {
            if (length(out1) == length(init)) {
              out[[i]] <- init <- out1
            } else {
              stop("function output is not a list length 1 or same length as 'init'")
            }
          } else {
            xs[[1]][[i]] <- out1
            out[[i]] <- init <- lapply(xs, `[[`, i)
          }
        }
      } else {
        out[[1L]] <- init
        for (i in ind) {
          out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
          if (is.list(out1)) {
            if (length(out1) == length(init)) {
              out[[i]] <- init <- out1
            } else {
              stop("function output is not length 1 or same length as 'init'")
            }
          } else {
            xs[[1]][[i]] <- out1
            out[[i]] <- init <- lapply(xs, `[[`, i)
          }
        }
      }
    } else {
      if (right) {
        out[[len]] <- init
        for (i in rev(ind)) {
          out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
          if (is.list(out1)) {
            if (length(out1) == length(init)) {
              out[[i]] <- init <- out1
            } else {
              stop("function output is not a list length 1 or same length as 'init'")
            }
          } else {
            xs[[1]][[i]] <- out1
            out[[i]] <- init <- lapply(xs, `[[`, i)
          }
        }
      } else {
        for (i in ind) {
          out[[i]] <- init
          out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
          if (is.list(out1)) {
            if (length(out1) == length(init)) {
              init <- out1
            } else {
              stop("function output is not length 1 or same length as 'init'")
            }
          } else {
            xs[[1]][[i]] <- out1
            init <- lapply(xs, `[[`, i)
          }
        }
        out[[len]] <- init
      }
    }
    if (all(lengths(out) == 1L))
      out <- unlist(out, recursive = FALSE)
    if (!isFALSE(simplify) && length(out))
      out <- simplify2array(lapply(out, `[[`, 1), higher = (simplify == "array"))
    out
  }
}
	#' 'Reduce' with arbitrary number of arguments
	#'
	#' Applies a function to the corresponding elements of given vectors,
	#' in a reductionist way. (This is not related to the [Apache Hadoop
	#' MapReduce](https://hadoop.apache.org/) project ... while this may
	#' suggest the name 'MapReduce' is a poor choice for this function, it
	#' is a logical combination of R's [Map()] and [Reduce()] functions.)
	#'
	#' @details
	#'
	#' The function is called with 'k' vectors as arguments, and the
	#' function should return either a list of k single values, or a
	#' single return value. If a singleton, then the value is assumed to
	#' replace the 'i'th value within the first argument (list or vector),
	#' and the next iteration's "previous" values will be take from this
	#' and this i'th value from all vectors except the first; if a
	#' k-length list, then this list is used as the next iteration's
	#' "previous" (the other value in each of the k argument vectors).
	#'
	#' If 'init' is given, this logically adds it to the start (when
	#' proceeding left-to-right) or the end of the input vectors,
	#' respectively. If these possibly-augmented vectors have n > 1
	#' vectors, 'MapReduce' successively applies f to the first (last) 2
	#' values of each vector.
	#'
	#' @section Translation from 'Reduce':
	#'
	#' Not all functions will translate directly from [Reduce()]. Many
	#' functions that work well with 'Reduce' expect exactly two
	#' arguments, with an effective length of 1 each; one example is '+'
	#' and other binary operators. In contrast, 'MapReduce' expects k
	#' vectors and will operate on all vectors (of length 2) in a call.
	#'
	#' While the use of binary operators as the sole function passed to
	#' 'MapReduce' is ignoring the 'Map' component, to translate binary
	#' operators, one must wrap the function so that it expects a single
	#' argument of length two. For example
	#'
	#' ```
	#' binary_func <- function(a, b) `+`(a, b) # Reduce
	#' binary_func2 <- function(u) `+`(u[1], u[2]) # MapReduce
	#' ```
	#'
	#' Author: Bill Evans (bill@8pawexpress.com)
	#'
	#' License: MIT: use as you will, no warranty, keep this license and
	#' citation if re-distributed
	#'
	#' @param f a function of the 'k' arity if this is called with k
	#' arguments.
	#' @param ... vectors or lists
	#' @param init an R object of the same kind as the elements of 'x'.
	#' @param right a logical indicating whether to proceed from left to
	#' right (default) or from right to left.
	#' @param accumulate a logical indicating whether the successive
	#' reduce combinations should be accumulated. By default, only the
	#' final combination is used.
	#' @return 'list', possibly with nested lists, and further if
	#' 'accumulate' is true
	#' @export
	#' @md
	#' @examples
	#' ### Adaptations from [Reduce()] documentation:
	#' x <- list(1, 2, 3)
	#' Reduce("+", x)
	#' # "+" is not a good function for MapReduce, as it expects two
	#' # separate inputs instead of one or more vectors, length 2; so
	#' # instead, we must adapt
	#' MapReduce(function(a) a[1] + a[2], x)
	#' # perhaps [sum()] is better
	#' MapReduce(sum, x)
	#'
	#' Reduce("+", x, accumulate = TRUE)
	#' MapReduce(sum, x, accumulate = TRUE)
	#'
	#' cfrac_binary <- function(u, v) u + 1 / v
	#' cfrac_unary <- function(u) u[1] + 1 / u[2]
	#' ## Continued fraction approximation for pi:
	#' x <- c(3, 7, 15, 1, 292)
	#' Reduce(cfrac_binary, x, right = TRUE)
	#' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE)
	#'
	#' ## Continued fraction approximation for Euler's number (e):
	#' x <- c(2, 1, 2, 1, 1, 4, 1, 1, 6, 1, 1, 8)
	#' Reduce(cfrac_binary, x, right = TRUE)
	#' MapReduce(cfrac_unary, x, right = TRUE, simplify = TRUE)
	#'
	#' ### Extensions, allowing for more arguments
	#'
	#' # suggested by https://stackoverflow.com/q/56612304
	#' if (require("data.table")) {
	#'
	#' nT <- 5
	#' int <- rep(1.1, 5)
	#' loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1))))
	#' f <- function(payments, interests) {
	#' list(payments[2] + interests[2] * payments[1], interests[2])
	#' }
	#'
	#' loan[, c("interest", "balance") := 0
	#' ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE)
	#' ][,interest := c(0, diff(balance) - payment[-1])
	#' ]
	#'
	#' ### that was boring, 'int' was unchanging; here's the impetus for MapReduce
	#'
	#' set.seed(2)
	#' (int <- rnorm(nT, mean = 0.1, sd = 0.02) + 1)
	#' # [1] 1.082062 1.103697 1.131757 1.077392 1.098395
	#'
	#' # refresh 'loan'
	#' loan <- data.table(loan.age = seq(0:(nT-1)), payment = c(5000, -rep(1000,(nT-1))))
	#'
	#' loan[, c("interest", "balance") := 0
	#' ][,balance := MapReduce(f, payment, int, accumulate = TRUE, simplify = TRUE)
	#' ][,interest := c(0, diff(balance) - payment[-1])
	#' ]
	#'
	#' }
	MapReduce <- function (f, ..., init, right = FALSE, accumulate = FALSE, simplify = FALSE) {
	mis <- missing(init)
	xs <- list(...)
	lens <- lengths(xs)
	len1 <- max(lens)
	if (length(lens) == 0L \|\| len1 == 0L)
	return(if (mis) NULL else init)
	if (!all(lens %in% c(len1, 1L))) {
	stop("all arguments must be same length or length 1")
	}
	xs[lens == 1L] <- Map(replicate, len1[any(lens == 1L)], xs[lens == 1L])
	f <- match.fun(f)
	xs <- lapply(xs, function(x) if (!is.vector(x) \|\| is.object(x)) as.list(x) else x)
	ind <- seq_len(len1)
	if (mis) {
	if (right) {
	init <- lapply(xs, `[[`, len1)
	ind <- ind[-len1]
	} else {
	init <- lapply(xs, `[[`, 1L)
	ind <- ind[-1L]
	}
	}
	if (!accumulate) {
	if (right) {
	for (i in rev(ind)) {
	out <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
	if (is.list(out)) {
	if (length(out) == length(init)) {
	init <- out
	} else {
	stop("function output is not length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out
	init <- lapply(xs, `[[`, i)
	}
	}
	} else {
	for (i in ind) {
	out <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
	if (is.list(out)) {
	if (length(out) == length(init)) {
	init <- out
	} else {
	stop("function output is not length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out
	init <- lapply(xs, `[[`, i)
	}
	}
	}
	if (!isFALSE(simplify) && length(out))
	init <- simplify2array(init[[1]], higher = (simplify == "array"))
	init
	} else {
	len <- length(ind) + 1L
	out <- vector("list", len)
	if (mis) {
	if (right) {
	out[[len]] <- init
	for (i in rev(ind)) {
	out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
	if (is.list(out1)) {
	if (length(out1) == length(init)) {
	out[[i]] <- init <- out1
	} else {
	stop("function output is not a list length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out1
	out[[i]] <- init <- lapply(xs, `[[`, i)
	}
	}
	} else {
	out[[1L]] <- init
	for (i in ind) {
	out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
	if (is.list(out1)) {
	if (length(out1) == length(init)) {
	out[[i]] <- init <- out1
	} else {
	stop("function output is not length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out1
	out[[i]] <- init <- lapply(xs, `[[`, i)
	}
	}
	}
	} else {
	if (right) {
	out[[len]] <- init
	for (i in rev(ind)) {
	out1 <- do.call(forceAndCall, c(list(2, f), Map(c, lapply(xs, `[[`, i), init)))
	if (is.list(out1)) {
	if (length(out1) == length(init)) {
	out[[i]] <- init <- out1
	} else {
	stop("function output is not a list length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out1
	out[[i]] <- init <- lapply(xs, `[[`, i)
	}
	}
	} else {
	for (i in ind) {
	out[[i]] <- init
	out1 <- do.call(forceAndCall, c(list(2, f), Map(c, init, lapply(xs, `[[`, i))))
	if (is.list(out1)) {
	if (length(out1) == length(init)) {
	init <- out1
	} else {
	stop("function output is not length 1 or same length as 'init'")
	}
	} else {
	xs[[1]][[i]] <- out1
	init <- lapply(xs, `[[`, i)
	}
	}
	out[[len]] <- init
	}
	}
	if (all(lengths(out) == 1L))
	out <- unlist(out, recursive = FALSE)
	if (!isFALSE(simplify) && length(out))
	out <- simplify2array(lapply(out, `[[`, 1), higher = (simplify == "array"))
	out
	}
	}