DavisVaughan/call-hell.r

## call-hell.r
# often package developers store the call in their model object
# and then print it out in their summary / print methods
lm_model <- lm(mpg ~ cyl, mtcars)

lm_model
#>
#> Call:
#> lm(formula = mpg ~ cyl, data = mtcars)
#>
#> Coefficients:
#> (Intercept)          cyl
#>      37.885       -2.876

lm_model$call
#> lm(formula = mpg ~ cyl, data = mtcars)

# normally this is not an issue, but sometimes package _users_
# want to call that modeling function programatically. This often
# means that they use `do.call()`
.f <- mpg ~ cyl
.data <- mtcars
lm_model_do_call <- do.call(lm, list(formula = .f, data = .data))

# essentially, this inlines the `data` in a textual representation in the call
# OH THE HORROR
lm_model_do_call
#>
#> Call:
#> (function (formula, data, subset, weights, na.action, method = "qr",
#>     model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE,
#>     contrasts = NULL, offset, ...)
#> {
#>     ret.x <- x
#>     ret.y <- y
#>     cl <- match.call()
#>     mf <- match.call(expand.dots = FALSE)
#>     m <- match(c("formula", "data", "subset", "weights", "na.action",
#>         "offset"), names(mf), 0L)
#>     mf <- mf[c(1L, m)]
#>     mf$drop.unused.levels <- TRUE
#>     mf[[1L]] <- quote(stats::model.frame)
#>     mf <- eval(mf, parent.frame())
#>     if (method == "model.frame")
#>         return(mf)
#>     else if (method != "qr")
#>         warning(gettextf("method = '%s' is not supported. Using 'qr'",
#>             method), domain = NA)
#>     mt <- attr(mf, "terms")
#>     y <- model.response(mf, "numeric")
#>     w <- as.vector(model.weights(mf))
#>     if (!is.null(w) && !is.numeric(w))
#>         stop("'weights' must be a numeric vector")
#>     offset <- as.vector(model.offset(mf))
#>     if (!is.null(offset)) {
#>         if (length(offset) != NROW(y))
#>             stop(gettextf("number of offsets is %d, should equal %d (number of observations)",
#>                 length(offset), NROW(y)), domain = NA)
#>     }
#>     if (is.empty.model(mt)) {
#>         x <- NULL
#>         z <- list(coefficients = if (is.matrix(y)) matrix(NA_real_,
#>             0, ncol(y)) else numeric(), residuals = y, fitted.values = 0 *
#>             y, weights = w, rank = 0L, df.residual = if (!is.null(w)) sum(w !=
#>             0) else if (is.matrix(y)) nrow(y) else length(y))
#>         if (!is.null(offset)) {
#>             z$fitted.values <- offset
#>             z$residuals <- y - offset
#>         }
#>     }
#>     else {
#>         x <- model.matrix(mt, mf, contrasts)
#>         z <- if (is.null(w))
#>             lm.fit(x, y, offset = offset, singular.ok = singular.ok,
#>                 ...)
#>         else lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
#>             ...)
#>     }
#>     class(z) <- c(if (is.matrix(y)) "mlm", "lm")
#>     z$na.action <- attr(mf, "na.action")
#>     z$offset <- offset
#>     z$contrasts <- attr(x, "contrasts")
#>     z$xlevels <- .getXlevels(mt, mf)
#>     z$call <- cl
#>     z$terms <- mt
#>     if (model)
#>         z$model <- mf
#>     if (ret.x)
#>         z$x <- x
#>     if (ret.y)
#>         z$y <- y
#>     if (!qr)
#>         z$qr <- NULL
#>     z
#> })(formula = mpg ~ cyl, data = structure(list(mpg = c(21, 21,
#> 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3,
#> 15.2, 10.4, 10.4, 14.7, 32.4, 30.4, 33.9, 21.5, 15.5, 15.2, 13.3,
#> 19.2, 27.3, 26, 30.4, 15.8, 19.7, 15, 21.4), cyl = c(6, 6, 4,
#> 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8,
#> 8, 4, 4, 4, 8, 6, 8, 4), disp = c(160, 160, 108, 258, 360, 225,
#> 360, 146.7, 140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460,
#> 440, 78.7, 75.7, 71.1, 120.1, 318, 304, 350, 400, 79, 120.3,
#> 95.1, 351, 145, 301, 121), hp = c(110, 110, 93, 110, 175, 105,
#> 245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52,
#> 65, 97, 150, 150, 245, 175, 66, 91, 113, 264, 175, 335, 109),
#>     drat = c(3.9, 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92,
#>     3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93,
#>     4.22, 3.7, 2.76, 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22,
#>     3.62, 3.54, 4.11), wt = c(2.62, 2.875, 2.32, 3.215, 3.44,
#>     3.46, 3.57, 3.19, 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25,
#>     5.424, 5.345, 2.2, 1.615, 1.835, 2.465, 3.52, 3.435, 3.84,
#>     3.845, 1.935, 2.14, 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46,
#>     17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3,
#>     18.9, 17.4, 17.6, 18, 17.98, 17.82, 17.42, 19.47, 18.52,
#>     19.9, 20.01, 16.87, 17.3, 15.41, 17.05, 18.9, 16.7, 16.9,
#>     14.5, 15.5, 14.6, 18.6), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1,
#>     1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1,
#>     0, 0, 0, 1), am = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#>     0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1),
#>     gear = c(4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3,
#>     3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4,
#>     4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1,
#>     1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2)), row.names = c("Mazda RX4",
#> "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout",
#> "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280",
#> "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood",
#> "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic",
#> "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin",
#> "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2",
#> "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora",
#> "Volvo 142E"), class = "data.frame"))
#>
#> Coefficients:
#> (Intercept)          cyl
#>      37.885       -2.876

# note the difference in size!
object.size(lm_model)
#> 25528 bytes
object.size(lm_model_do_call)
#> 237824 bytes

# here it is
object.size(lm_model_do_call$call)
#> 213024 bytes

# if you are a package developer, please don't save the call

# if you are a package user, you can avoid this issue with rlang
library(rlang)
#>
#> Attaching package: 'rlang'
#> The following object is masked _by_ '.GlobalEnv':
#>
#>     .data

clean_lm_call <- call2("lm", mpg ~ cyl, sym("mtcars"))
clean_lm_call
#> lm(mpg ~ cyl, mtcars)

eval_bare(clean_lm_call)
#>
#> Call:
#> lm(formula = mpg ~ cyl, data = mtcars)
#>
#> Coefficients:
#> (Intercept)          cyl
#>      37.885       -2.876

# or with base R
eval(call("lm", mpg ~ cyl, substitute(mtcars)))
#>
#> Call:
#> lm(formula = mpg ~ cyl, data = mtcars)
#>
#> Coefficients:
#> (Intercept)          cyl
#>      37.885       -2.876

# if you need to control the environment that `mtcars` is evaluated (i.e. looked up)
# in, you can either quo() it to tie the environment to the symbol and use eval_tidy()
# or you can also specify an environment to the `eval_bare()` call for terms to be
# looked up in

# lets use a different data set where we have to control the environment

generate_call_bad <- function() {
  my_data <- mtcars
  call2("lm", mpg ~ cyl, sym("my_data"))
}

generate_call_bad()
#> lm(mpg ~ cyl, my_data)

# oh no :/
eval_bare(generate_call_bad())
#> Error in is.data.frame(data): object 'my_data' not found

generate_call_good <- function() {
  my_data <- mtcars
  call2("lm", mpg ~ cyl, quo(my_data)) # <- the symbol `my_data` + environment are kept
}

call_good <- generate_call_good()
call_good
#> lm(mpg ~ cyl, ~my_data)

# ah there it is, see the `env`ironment?
# that tells us where to look up `my_data`
call_good[[3]]
#> <quosure>
#> expr: ^my_data
#> env:  0x7f800e48fa60

eval_tidy(generate_call_good())
#>
#> Call:
#> lm(formula = mpg ~ cyl, data = ~my_data)
#>
#> Coefficients:
#> (Intercept)          cyl
#>      37.885       -2.876
	# often package developers store the call in their model object
	# and then print it out in their summary / print methods
	lm_model <- lm(mpg ~ cyl, mtcars)

	lm_model
	#>
	#> Call:
	#> lm(formula = mpg ~ cyl, data = mtcars)
	#>
	#> Coefficients:
	#> (Intercept) cyl
	#> 37.885 -2.876

	lm_model$call
	#> lm(formula = mpg ~ cyl, data = mtcars)

	# normally this is not an issue, but sometimes package _users_
	# want to call that modeling function programatically. This often
	# means that they use `do.call()`
	.f <- mpg ~ cyl
	.data <- mtcars
	lm_model_do_call <- do.call(lm, list(formula = .f, data = .data))

	# essentially, this inlines the `data` in a textual representation in the call
	# OH THE HORROR
	lm_model_do_call
	#>
	#> Call:
	#> (function (formula, data, subset, weights, na.action, method = "qr",
	#> model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE,
	#> contrasts = NULL, offset, ...)
	#> {
	#> ret.x <- x
	#> ret.y <- y
	#> cl <- match.call()
	#> mf <- match.call(expand.dots = FALSE)
	#> m <- match(c("formula", "data", "subset", "weights", "na.action",
	#> "offset"), names(mf), 0L)
	#> mf <- mf[c(1L, m)]
	#> mf$drop.unused.levels <- TRUE
	#> mf[[1L]] <- quote(stats::model.frame)
	#> mf <- eval(mf, parent.frame())
	#> if (method == "model.frame")
	#> return(mf)
	#> else if (method != "qr")
	#> warning(gettextf("method = '%s' is not supported. Using 'qr'",
	#> method), domain = NA)
	#> mt <- attr(mf, "terms")
	#> y <- model.response(mf, "numeric")
	#> w <- as.vector(model.weights(mf))
	#> if (!is.null(w) && !is.numeric(w))
	#> stop("'weights' must be a numeric vector")
	#> offset <- as.vector(model.offset(mf))
	#> if (!is.null(offset)) {
	#> if (length(offset) != NROW(y))
	#> stop(gettextf("number of offsets is %d, should equal %d (number of observations)",
	#> length(offset), NROW(y)), domain = NA)
	#> }
	#> if (is.empty.model(mt)) {
	#> x <- NULL
	#> z <- list(coefficients = if (is.matrix(y)) matrix(NA_real_,
	#> 0, ncol(y)) else numeric(), residuals = y, fitted.values = 0 *
	#> y, weights = w, rank = 0L, df.residual = if (!is.null(w)) sum(w !=
	#> 0) else if (is.matrix(y)) nrow(y) else length(y))
	#> if (!is.null(offset)) {
	#> z$fitted.values <- offset
	#> z$residuals <- y - offset
	#> }
	#> }
	#> else {
	#> x <- model.matrix(mt, mf, contrasts)
	#> z <- if (is.null(w))
	#> lm.fit(x, y, offset = offset, singular.ok = singular.ok,
	#> ...)
	#> else lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
	#> ...)
	#> }
	#> class(z) <- c(if (is.matrix(y)) "mlm", "lm")
	#> z$na.action <- attr(mf, "na.action")
	#> z$offset <- offset
	#> z$contrasts <- attr(x, "contrasts")
	#> z$xlevels <- .getXlevels(mt, mf)
	#> z$call <- cl
	#> z$terms <- mt
	#> if (model)
	#> z$model <- mf
	#> if (ret.x)
	#> z$x <- x
	#> if (ret.y)
	#> z$y <- y
	#> if (!qr)
	#> z$qr <- NULL
	#> z
	#> })(formula = mpg ~ cyl, data = structure(list(mpg = c(21, 21,
	#> 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3,
	#> 15.2, 10.4, 10.4, 14.7, 32.4, 30.4, 33.9, 21.5, 15.5, 15.2, 13.3,
	#> 19.2, 27.3, 26, 30.4, 15.8, 19.7, 15, 21.4), cyl = c(6, 6, 4,
	#> 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8,
	#> 8, 4, 4, 4, 8, 6, 8, 4), disp = c(160, 160, 108, 258, 360, 225,
	#> 360, 146.7, 140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460,
	#> 440, 78.7, 75.7, 71.1, 120.1, 318, 304, 350, 400, 79, 120.3,
	#> 95.1, 351, 145, 301, 121), hp = c(110, 110, 93, 110, 175, 105,
	#> 245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52,
	#> 65, 97, 150, 150, 245, 175, 66, 91, 113, 264, 175, 335, 109),
	#> drat = c(3.9, 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92,
	#> 3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93,
	#> 4.22, 3.7, 2.76, 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22,
	#> 3.62, 3.54, 4.11), wt = c(2.62, 2.875, 2.32, 3.215, 3.44,
	#> 3.46, 3.57, 3.19, 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25,
	#> 5.424, 5.345, 2.2, 1.615, 1.835, 2.465, 3.52, 3.435, 3.84,
	#> 3.845, 1.935, 2.14, 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46,
	#> 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3,
	#> 18.9, 17.4, 17.6, 18, 17.98, 17.82, 17.42, 19.47, 18.52,
	#> 19.9, 20.01, 16.87, 17.3, 15.41, 17.05, 18.9, 16.7, 16.9,
	#> 14.5, 15.5, 14.6, 18.6), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1,
	#> 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1,
	#> 0, 0, 0, 1), am = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	#> 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1),
	#> gear = c(4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3,
	#> 3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4,
	#> 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1,
	#> 1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2)), row.names = c("Mazda RX4",
	#> "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout",
	#> "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280",
	#> "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood",
	#> "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic",
	#> "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin",
	#> "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2",
	#> "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora",
	#> "Volvo 142E"), class = "data.frame"))
	#>
	#> Coefficients:
	#> (Intercept) cyl
	#> 37.885 -2.876

	# note the difference in size!
	object.size(lm_model)
	#> 25528 bytes
	object.size(lm_model_do_call)
	#> 237824 bytes

	# here it is
	object.size(lm_model_do_call$call)
	#> 213024 bytes

	# if you are a package developer, please don't save the call

	# if you are a package user, you can avoid this issue with rlang
	library(rlang)
	#>
	#> Attaching package: 'rlang'
	#> The following object is masked _by_ '.GlobalEnv':
	#>
	#> .data

	clean_lm_call <- call2("lm", mpg ~ cyl, sym("mtcars"))
	clean_lm_call
	#> lm(mpg ~ cyl, mtcars)

	eval_bare(clean_lm_call)
	#>
	#> Call:
	#> lm(formula = mpg ~ cyl, data = mtcars)
	#>
	#> Coefficients:
	#> (Intercept) cyl
	#> 37.885 -2.876

	# or with base R
	eval(call("lm", mpg ~ cyl, substitute(mtcars)))
	#>
	#> Call:
	#> lm(formula = mpg ~ cyl, data = mtcars)
	#>
	#> Coefficients:
	#> (Intercept) cyl
	#> 37.885 -2.876

	# if you need to control the environment that `mtcars` is evaluated (i.e. looked up)
	# in, you can either quo() it to tie the environment to the symbol and use eval_tidy()
	# or you can also specify an environment to the `eval_bare()` call for terms to be
	# looked up in

	# lets use a different data set where we have to control the environment

	generate_call_bad <- function() {
	my_data <- mtcars
	call2("lm", mpg ~ cyl, sym("my_data"))
	}

	generate_call_bad()
	#> lm(mpg ~ cyl, my_data)

	# oh no :/
	eval_bare(generate_call_bad())
	#> Error in is.data.frame(data): object 'my_data' not found

	generate_call_good <- function() {
	my_data <- mtcars
	call2("lm", mpg ~ cyl, quo(my_data)) # <- the symbol `my_data` + environment are kept
	}

	call_good <- generate_call_good()
	call_good
	#> lm(mpg ~ cyl, ~my_data)

	# ah there it is, see the `env`ironment?
	# that tells us where to look up `my_data`
	call_good[[3]]
	#> <quosure>
	#> expr: ^my_data
	#> env: 0x7f800e48fa60

	eval_tidy(generate_call_good())
	#>
	#> Call:
	#> lm(formula = mpg ~ cyl, data = ~my_data)
	#>
	#> Coefficients:
	#> (Intercept) cyl
	#> 37.885 -2.876