Skip to content

Instantly share code, notes, and snippets.

@mmaechler
Last active November 28, 2016 18:07
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mmaechler/9cfc3219c4b89649313bfe6853d87894 to your computer and use it in GitHub Desktop.
Save mmaechler/9cfc3219c4b89649313bfe6853d87894 to your computer and use it in GitHub Desktop.
`ifelse2()` - experiments for a new version of `ifelse()`
source("ifelse-def.R")
##' warnifnot(): a "only-warning" version of stopifnot():
##' {Yes, learn how to use do.call(substitute, ...) in a powerful manner !!}
warnifnot <- stopifnot
body(warnifnot) <- do.call(substitute, list(body(stopifnot),
list(stop = quote(warning))))
## (now, this was really cute ....)
##' @title Simplistic Checking of Different ifelse() Implementations
##' @param FUN a function \dQuote{similar but "better" than} \code{ifelse()}
##' @param nFact positive integer specifying the number factor() checks
##' @param noRmpfr logical specifying to skip \pkg{Rmpfr} examples
chkIfelse <- function(FUN, nFact = 500, NULLerror = TRUE, noRmpfr = FALSE) {
FUN <- match.fun(FUN)
if(NULLerror) {
op <- options(error = NULL); on.exit( options(op) )
}
rTF <- function(n, pr.T)
sample(c(TRUE,FALSE), n, replace=TRUE, prob = c(pr.T, 1-pr.T))
Try <- function(expr) tryCatch(expr, error = identity)
chkArith <- function(T., yes, no, trafo = identity, class. = class(yes), ...) {
r <- Try(FUN(T., yes, no))
warnifnot(
ii <- inherits(r, class.),
if(ii) all.equal(trafo(r),
T.*trafo(yes) + (!T.)*trafo(no), ...) else TRUE)
invisible(r)
}
if((has.4th <- length(formals(FUN)) >= 4))
if(names(formals(FUN))[[4]] %in% "missing") { # we use 'NA.' below
body(FUN) <- do.call(substitute,
list(body(FUN),
setNames(list(quote(NA.)), names(formals(FUN))[[4]])))
names(formals(FUN))[[4]] <- "NA."
}
else # not a "NA." / "missing" like meaning of 4th argument
has.4th <- FALSE
rid <- FUN(c(TRUE, FALSE, TRUE), 1:3, 100*(1:3)) # int / double
rdd <- FUN(c(TRUE, FALSE, TRUE), 0+1:3, 100*(1:3))
rdN <- FUN(c(TRUE, FALSE,NA,TRUE), 0+1:4, 100*(1:4))
warnifnot(all.equal(rid, rdd, tol = 0),
all.equal(rdN, c(1,200, NA, 4), tol = 0))
if(has.4th)
warnifnot(identical(
FUN(c(TRUE, FALSE,NA,TRUE), 0+ 1:4, 100*(1:4), NA. = -909),
c(1, 200, -909, 4)))
##--- matrices -------------------------------
for(i in 1:20) { ## ifelse() keeps attributes (from 'test')
r10 <- round(10 * rnorm(10))
m2 <- cbind(10:1, r10)
rm2 <- FUN(m2 >= -2, m2, abs(m2))
warnifnot(iM <- is.matrix(rm2))
if(iM) warnifnot(identical(rm2[,2], FUN(r10 >= -2 , r10, abs(r10))))
}
r10 <- c(-2, 27, -7, 4, -3, 2, -1, -4, -3, -8)
##--- Date-Time objects: -----------------------
rD <- FUN(c(TRUE, FALSE, TRUE), Sys.Date(), as.Date("2016-11-11")) # Date
warnifnot(inherits(rD, "Date"))
ch03 <- paste0("2003-",rep(1:4, 4:1), "-", sample(1:28, 10, replace=TRUE))
x03ct <- as.POSIXct(ch03)
x03D <- as.Date(ch03)
for(i in 1:20)
warnifnot(identical(x03D, FUN(rTF(10, pr.=0.4), x03D, x03D )),
identical(x03ct, FUN(rTF(10, pr.=0.7), x03ct, x03ct)))
ct <- Sys.time(); lt <- as.POSIXlt(ct)
ifct <- FUN(c(TRUE, FALSE, NA, TRUE), ct, ct-100)# POSIXct
iflt <- FUN(c(TRUE, FALSE, NA, TRUE), lt, lt-100)# POSIXlt/ct "mix"
ifll <- FUN(c(TRUE, FALSE, NA, TRUE), lt, as.POSIXlt(lt-100))# POSIXlt
warnifnot(ic <- inherits(ifct, "POSIXct"),
il <- inherits(ifll, "POSIXlt"),
!ic || identical(ifll, as.POSIXlt(ifct)),
!il || identical(ifll, as.POSIXlt(iflt))
)
## POSIX*t now with 'tzone' / TZ --- ("the horror"): ----
tzs <- c("UTC", "EST", "EST5EDT")
x03lt.s <- sapply(tzs, as.POSIXlt, x = x03ct, simplify=FALSE)
x03ct.s <- lapply(x03lt.s, as.POSIXct)
for(y in tzs)
for(n in tzs) {
T. <- rTF(10, pr. = 0.4)
## v v
chkArith(T., x03lt.s[[y]], x03lt.s[[n]], trafo = as.numeric)
chkArith(T., x03lt.s[[y]], x03ct.s[[n]], trafo = as.numeric)
chkArith(T., x03ct.s[[y]], x03lt.s[[n]], trafo = as.numeric)
chkArith(T., x03ct.s[[y]], x03ct.s[[n]], trafo = as.numeric)
## ^ ^
}
##-- "difftime" another "atomic-like" base S3 class:
dt.h <- as.difftime(c(1,20,60), units = "hours")
dt.m <- as.difftime(c(1,30,60), units = "mins")
dt.s <- as.difftime(c(1,30,60), units = "secs")
Tst <- c(TRUE, FALSE, TRUE)
warnifnot(
## the easy ones: *same* units
identical(dt.h, FUN(Tst, dt.h, dt.h)),
identical(dt.m, FUN(Tst, dt.m, dt.m)),
identical(dt.s, FUN(Tst, dt.s, dt.s)))
## the tough ones
chkArith(Tst, dt.h, dt.m, trafo = function(.) as.double(., "hours"), tol = 1e-14)
chkArith(Tst, dt.h, dt.s, trafo = function(.) as.double(., "mins"), tol = 1e-14)
chkArith(Tst, dt.s, dt.m, trafo = function(.) as.double(., "secs"), tol = 1e-14)
## now the "factor": -----------------------------------
f1 <- suppressWarnings(
FUN(c(TRUE, FALSE), factor(2:3), factor(3:4)))# "works" with warning
warnifnot(is.factor(f1), length(f1) == 2)# not much more
ff <- gl(11,5, labels=LETTERS[1:11]); yes <- ff; no <- rev(ff)
llev <- levels(ff)[length(levels(ff))]
for(i in seq_len(nFact)) {
test <- sample(c(TRUE,FALSE, NA), length(ff), TRUE)
r <- FUN(test, yes, no)
if(has.4th) {
rN <- suppressWarnings(FUN(test, yes, no, NA. = "Z"))
warnifnot(identical(r, rN))
rN <- FUN(test, yes, no, NA. = llev)
} else {
rN <- r
rN[is.na(rN)] <- llev
}
tst.T <- test & !is.na(test)
tst.F <- !test & !is.na(test)
warnifnot(is.factor(r), identical(levels(r), levels(ff)),
r[tst.T] == yes[tst.T], identical(r[tst.T], rN[tst.T]),
r[tst.F] == no[tst.F], identical(r[tst.F], rN[tst.F]),
all(is.na(r[is.na(test)])),
all(rN[is.na(test)] == llev))
}
if(!require("Matrix"))
stop("Your R installation is broken: The 'Matrix' package must be available")
if(packageVersion("Matrix") >= "1.2-8") {
sv1 <- sparseVector(x = 1:10, i = sample(999, 10), length=1000)
sv2 <- sparseVector(x = -(1:50), i = sample(300, 10), length= 300)#-> recycling
ssv2 <- rep(sv2, length.out = length(sv1))
rsv1 <- FUN(sv1 != 0, sv1, sv2)
rsv2 <- FUN(sv2 != 0, sv2, sv1)
vv1 <- as(sv1, "vector"); vv2 <- as(sv2, "vector")
rvv1 <- FUN(vv1 != 0, vv1, vv2); rvv2 <- FUN(vv2 != 0, vv2, vv1)
warnifnot(is(rsv1, "sparseVector"), is(rsv2, "sparseVector"),
rvv1 == rsv1, rvv2 == rsv2,
identical(sv1, FUN(sv1 != 0, sv1, sv1)),
identical(sv2, FUN(sv2 != 0, sv2, sv2)),
identical(sv1, FUN(sv1 == 0, sv1, sv1)),
identical(sv2, FUN(sv2 == 0, sv2, sv2)),
TRUE)
sM1 <- Matrix(sv1, 50,20)
sM2 <- Matrix(sv2, 30,10)
rsM1 <- FUN(sM1 != 0, sM1, sM2)
rsM2 <- FUN(sM2 != 0, sM2, sM1)
warnifnot(is(rsM1, "sparseMatrix"), is(rsM2, "sparseMatrix"),
all.equal(rsv1, rsM1, tol=0),
all.equal(rsv2, rsM2, tol=0),
identical(sM1, FUN(sM1 > 0, sM1, sM1)),
identical(sM2, FUN(sM2 > 0, sM2, sM2)))
}
z5 <- c(-4, -12, -1, 16, 7)
## and these
if(require("Rmpfr")) {
r1 <- FUN(c(TRUE, FALSE,TRUE,TRUE), mpfr(1:4, 64), mpfr(10*(1:4),64))
warnifnot(inherits(r1, "mpfr"), r1 == c(1, 20, 3:4))
if(has.4th) {
r2 <- FUN(c(TRUE, FALSE, NA ,TRUE), mpfr(1:4, 64), mpfr(10*(1:4),64),
NA. = mpfr(-999,10))
warnifnot(inherits(r2, "mpfr"), r2 == c(1, 20, -999, 4))
}
## and some "gmp" checks ("Rmpfr" requires "gmp")
ZZ <- as.bigz(1:7)^50
rZZ <- FUN(rTF(7, .4), ZZ, ZZ)
warnifnot(inherits(rZZ, "bigz"), all(ZZ == rZZ))
## warnifnot(identical(ZZ, )) ## <<< gmp bug (not so easy to fix ??)
} else if(!noRmpfr)
message("not testing 'Rmpfr' ..")
if(require("zoo")) {
z1 <- as.zoo(z5)
zt <- as.zoo(ts(z5, start = 1981, freq = 12))
zM <- suppressWarnings(zoo(cbind(10:1, rnorm(10)), x03D))
warnifnot(identical(z1, FUN(z1 > 1, z1, z1)),
identical(zt, FUN(zt > 1, zt, zt)),
TRUE ## Fails; problem? identical(zM, FUN(zM > 1, zM, zM))
)
## TODO more?
} else
message("not testing 'zoo' ..")
invisible(TRUE)
}# end{chkIfelse}
###-----------------
## Suharto Anggono - on R-devel@..., Nov.26, 2016
## Cases where the last version above ('ifelseSA2') or 'ifelse2 or 'ifelseHW' in
## ifelse-def.R gives inappropriate answers:
## - 'yes' and 'no' are "difftime" objects with different "units" attribute
## - 'yes' and 'no' are "POSIXlt" objects with different time zone
## Example: 'yes' in "UTC" and 'no' in "EST5EDT". The reverse, 'yes' in "EST5EDT" and 'no' in "UTC" gives error.
## For the cases, c(yes, no) helps. Function 'ifelseJH' in ifelse-def.R gives a right answer for "POSIXlt" case.
op <- options(nwarnings = 5000)# default '50'
try( chkIfelse(ifelse) ) # an error and 3135 warnings (!)
## ======
unique(warnings()) # around 30
require("Rmpfr")# with all its "conflicts" warnings ..
chkIfelse(ifelse2) # yes!
## => not ok for difftime with *different* units
if(FALSE) { ## when you have errors, get more :
chkIfelse(ifelse2, NULLerror=FALSE)
traceback() #
## or even
opE <- options(warn = 2, error = recover)
chkIfelse(ifelse2, NULLerror=FALSE)
options(opE)
}
chkIfelse(ifelseSA1)## 32 warnings
unique(warnings()) ## 11 unique ..
chkIfelse(ifelseSA2) ## yes! - no warning (or error)
## => not ok for difftime with *different* units
## (plus the 'Matrix' "notes") :
## <sparse>[ <logic> ] : .M.sub.i.logical() maybe inefficient
## <sparse>[ <logic> ] : .M.sub.i.logical() maybe inefficient
## The "next" best:
try( chkIfelse(ifelseHW) ) ; unique(warnings()) ## matrix + POSIX(ct|lt) ( + difftime-diff-units)
## "of course", all these fail
try( chkIfelse(ifelseJH) ) ; unique(warnings()) # *does* work with 'difftime-diff-units'
##try( chkIfelse(ifelseR) ) ; unique(warnings())
try( chkIfelse(ifelseR101) ) ; unique(warnings())
try( chkIfelse(ifelseR0633)) ; unique(warnings())
### Specifically ifelseHW() works here [but Hadley's original if_else() fails !!
ifelseHW(c(TRUE, FALSE,TRUE), 1:3, 100*(1:3))
## if_else: Error: `false` has type 'double' not 'integer'
##
options(op)
#### Only definitions -- mainly of diverse implementations of ifelse()
## base :: ifelse ----(since r68595 | hornik | 2015-06-28), as of 2016-08-08
##M: ~/R/D/r-devel/R/src/library/base/R/ifelse.R
ifelseR <- function (test, yes, no)
{
if(is.atomic(test)) { # do not lose attributes
if (typeof(test) != "logical")
storage.mode(test) <- "logical"
## quick return for cases where 'ifelse(a, x, y)' is used
## instead of 'if (a) x else y'
if (length(test) == 1 && is.null(attributes(test))) {
if (is.na(test)) return(NA)
else if (test) {
if (length(yes) == 1 && is.null(attributes(yes)))
return(yes)
}
else if (length(no) == 1 && is.null(attributes(no)))
return(no)
}
}
else ## typically a "class"; storage.mode<-() typically fails
test <- if(isS4(test)) methods::as(test, "logical") else as.logical(test)
ans <- test
ok <- !(nas <- is.na(test))
if (any(test[ok]))
ans[test & ok] <- rep(yes, length.out = length(ans))[test & ok]
if (any(!test[ok]))
ans[!test & ok] <- rep(no, length.out = length(ans))[!test & ok]
ans[nas] <- NA
ans
}
## base R 1.0.1 's version [file/svn date: May 9, 1999] :
ifelseR101 <- function (test, yes, no)
{
ans <- test
test <- as.logical(test)
nas <- is.na(test)
if (any(test[!nas])) {
ans[test] <- rep(yes, length = length(ans))[test]
}
if (any(!test[!nas])) {
ans[!test] <- rep(no, length = length(ans))[!test]
}
ans[nas] <- NA
ans
}
## base R 0.63.3's version [file/svn date: Sep 10, 1998] :
ifelseR0633 <- function (test, yes, no)
{
ans <- test
test <- as.logical(test)
nas <- is.na(test)
ans[ test] <- rep(yes, length = length(ans))[ test]
ans[!test] <- rep(no, length = length(ans))[!test]
ans[nas] <- NA
ans
}
## Hadley's dplyr::if_else (with *simplified* check_*)
##M: /usr/local.nfs/app/R/R_local/src/dplyr/R/if_else.R
#' Vectorised if.
#'
#' Compared to the base \code{\link{ifelse}()}, this function is more strict.
#' It checks that \code{true} and \code{false} are the same type. This
#' strictness makes the output type more predictable, and makes it somewhat
#' faster.
#'
#' @param condition Logical vector
#' @param true,false Values to use for \code{TRUE} and \code{FALSE} values of
#' \code{condition}. They must be either the same length as \code{condition},
#' or length 1. They must also be the same type: \code{if_else} checks that
#' they have the same type and same class. All other attributes are
#' taken from \code{true}.
#' @param missing If not \code{NULL}, will be used to replace missing
#' values.
#' @return Where \code{condition} is \code{TRUE}, the matching value from
#' \code{true}, where it's \code{FALSE}, the matching value from \code{false},
#' otherwise \code{NA}.
#' @export
#' @examples
#' x <- c(-5:5, NA)
#' if_else(x < 0, NA_integer_, x)
#' if_else(x < 0, "negative", "positive", "missing")
#'
#' # Unlike ifelse, if_else preserves types
#' x <- factor(sample(letters[1:5], 10, replace = TRUE))
#' ifelse(x %in% c("a", "b", "c"), x, factor(NA))
#' if_else(x %in% c("a", "b", "c"), x, factor(NA))
#' # Attributes are taken from the `true` vector,
ifelseHW <- function(condition, true, false, missing = NULL) {
if (!is.logical(condition))
stop("`condition` must be logical", call. = FALSE)
out <- true[rep(NA_integer_, length(condition))]
out <- replace_with(out, condition & !is.na(condition), true, "`true`")
out <- replace_with(out, !condition & !is.na(condition), false, "`false`")
replace_with(out, is.na(condition), missing, "`missing`")
}
##M: /usr/local.nfs/app/R/R_local/src/dplyr/R/utils-replace-with.R
##' Simplified, more tolerant, standalone version of dplyr::replace_with()
replace_with <- function(x, i, val, name) {
if (is.null(val))
return(x)
n <- length(x) ## + stop() below <==> check_length(val, x, name)
## check_type(val, x, name) ## <==> stopifnot(typeof(val) == typeof(x))
## check_class(val, x, name)## <==> if(is.object(x))
## stopifnot(identical(class(x),class(template)))
x[i] <- if (length(val) == 1L) val
else if(length(val) == n) val[i]
else stop("no recycling here: length(val) must be 1 or length(x)")
x
}
##'--------- Jonathan Hosking's ifthen(): .. sent by private E-mail, Aug.2016
ifelseJH <- function(test, yes, no, warn = TRUE) {
##
## Variant of ifelse(). Class and mode of result are those of 'c(yes,no)'.
## Thus the Warning in the help of ifelse() does not apply, and if
## 'yes' and 'no' have the same class then the result also has this class.
## Names and dimensions of result are those of 'test'.
##
## Example:
##
## d1 <- Sys.Date() + (1:3) # A "Date" object
## d2 <- Sys.Date() + (31:33) # Another "Date" object
## ifelse(c(TRUE, FALSE, TRUE), d1, d2) # Returns a numeric vector(!)
## ifthen(c(TRUE, FALSE, TRUE), d1, d2) # Returns a "Date" object
##
n <- length(test)
len <- c(length(yes),length(no))
# if (getRversion() < "2.11.0") { # getRversion() and its "<" method are slow!
if (inherits(yes,"POSIXlt")) len[1] <- length(yes[[1]]) # Accommodates R 2.10.x and earlier, probably not needed now
if (inherits( no,"POSIXlt")) len[2] <- length( no[[1]])
# }
if (!all(len == n)) {
if (warn && !all(is.element(len,c(1,n))))
warning("lengths of 'yes' and 'no' are not both either 1 or the same as the length of 'test'")
yes <- rep(yes,length.out = n)
no <- rep(no,length.out = n)
}
tt <- as.logical(test)
out <- c(yes,no)[seq_along(tt)+n*(!tt)]
if (inherits(out,"POSIXt")) {
tz <- attr(yes,"tzone")
if (!is.null(tz) && identical(tz,attr(no,"tzone"))) attr(out,"tzone") <- tz
}
if (inherits(out,"POSIXlt")) return(out)
dim(out) <- dim(test)
if (is.null(dim(test))) names(out) <- names(test)
else dimnames(out) <- dimnames(test)
out
}
##' Martin Maechler, 14. Nov 2016 --- taking into account Duncan M. and Hadley's
##' ideas in the R-devel thread starting at (my mom's 86th birthday):
##' https://stat.ethz.ch/pipermail/r-devel/2016-August/072970.html
ifelse2 <- function (test, yes, no, NA. = NA) {
if(!is.logical(test)) {
if(is.atomic(test))
storage.mode(test) <- "logical"
else ## typically a "class"; storage.mode<-() typically fails
test <- if(isS4(test)) methods::as(test, "logical") else as.logical(test)
}
## No longer optimize the "if (a) x else y" cases:
## Only "non-good" R users use ifelse(.) instead of if(.) in these cases.
ans <-
tryCatch(if(identical(class(yes), class(no))) {
## as c(o) or o[0] may not work for the class
if(length(yes) == length(test))
yes # keep attributes such as dim(.)
else
rep(yes, length.out = length(test))
}
else rep(c(yes[0], no[0]), length.out = length(test)),
error = function(e) structure(e, class = c("ifelse2_error", class(e))))
if(inherits(ans, "ifelse2_error")) { ## -> asymmetric yes-leaning
ans <- yes
ans[!test] <- no[!test] # (potentially lots of recycling here)
if(anyNA(test))
ans[is.na(test)] <- NA.
}
else {
ok <- !(nas <- is.na(test))
if (any(test[ok]))
ans[test & ok] <- rep(yes, length.out = length(ans))[test & ok]
if (any(!test[ok]))
ans[!test & ok] <- rep(no, length.out = length(ans))[!test & ok]
ans[nas] <- NA. # possibly coerced to class(ans)
}
ans
}
## Suharto Anggono - on R-devel@..., Nov.26, 2016
## A concrete version of 'ifelse2' that starts the result from 'yes':
ifelseSA1 <- function(test, yes, no, NA. = NA) {
if(!is.logical(test))
test <- if(isS4(test)) methods::as(test, "logical") else as.logical(test)
n <- length(test)
ans <- rep(yes, length.out = n)
ans[!test & !is.na(test)] <- rep(no, length.out = n)[!test & !is.na(test)]
ans[is.na(test)] <- rep(NA., length.out = n)[is.na(test)]
ans
}
## It requires 'rep' method that is compatible with subsetting. It also works
## with "POSIXlt" in R 2.7.2, when 'length' gives 9, and gives an appropriate
## result if time zones are the same.
## For coercion of 'test', there is no need of keeping attributes. So, it
## doesn't do
## storage.mode(test) <- "logical"
## and goes directly to 'as.logical'.
## It relies on subassignment for silent coercions of
## logical < integer < double < complex .
## Unlike 'ifelse', it never skips any subassignment. So, phenomenon as in "example of different return modes" in ?ifelse doesn't happen.
## Suharto Anggono - on R-devel@..., Nov.26, 2016
## Another version, for keeping attributes as pointed out by Duncan Murdoch:
ifelseSA2 <- function(test, yes, no, NA. = NA) {
if(!is.logical(test))
test <- if(isS4(test)) methods::as(test, "logical") else as.logical(test)
n <- length(test)
n.yes <- length(yes); n.no <- length(no)
if (n.yes != n) {
if (n.no == n) { # swap yes <-> no
test <- !test
ans <- yes; yes <- no; no <- ans
n.no <- n.yes
} else yes <- yes[rep_len(seq_len(n.yes), n)]
}
ans <- yes
if (n.no == 1L)
ans[!test] <- no
else
ans[!test & !is.na(test)] <- no[
if (n.no == n) !test & !is.na(test)
else rep_len(seq_len(n.no), n)[!test & !is.na(test)]]
stopifnot(length(NA.) == 1L) ## << MM: I have been assuming this in all cases
ans[is.na(test)] <- NA.
ans
}
## Note argument evaluation order: 'test', 'yes', 'no', 'NA.'.
## First, it chooses the first of 'yes' and 'no' that has the same length as
## the result. If none of 'yes' and 'no' matches the length of the result, it
## chooses recycled (or truncated) 'yes'.
## It uses 'rep' on the index and subsetting as a substitute for 'rep' on the
## value.
## It requires 'length' method that is compatible with subsetting.
## Additionally, it uses the same idea as dplyr::if_else, or more precisely
## the helper function 'replace_with'. It doesn't use 'rep' if the length of
## 'no' is 1 or is the same as the length of the result. For subassignment
## with value of length 1, recycling happens by itself and NA in index is OK.
## It limits 'NA.' to be of length 1, considering 'NA.' just as a label for NA.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment