Skip to content

Instantly share code, notes, and snippets.

@DavisVaughan
Created May 31, 2019 20:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DavisVaughan/0e3264a258f49f392118192df6ade239 to your computer and use it in GitHub Desktop.
Save DavisVaughan/0e3264a258f49f392118192df6ade239 to your computer and use it in GitHub Desktop.
# often package developers store the call in their model object
# and then print it out in their summary / print methods
lm_model <- lm(mpg ~ cyl, mtcars)
lm_model
#>
#> Call:
#> lm(formula = mpg ~ cyl, data = mtcars)
#>
#> Coefficients:
#> (Intercept) cyl
#> 37.885 -2.876
lm_model$call
#> lm(formula = mpg ~ cyl, data = mtcars)
# normally this is not an issue, but sometimes package _users_
# want to call that modeling function programatically. This often
# means that they use `do.call()`
.f <- mpg ~ cyl
.data <- mtcars
lm_model_do_call <- do.call(lm, list(formula = .f, data = .data))
# essentially, this inlines the `data` in a textual representation in the call
# OH THE HORROR
lm_model_do_call
#>
#> Call:
#> (function (formula, data, subset, weights, na.action, method = "qr",
#> model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE,
#> contrasts = NULL, offset, ...)
#> {
#> ret.x <- x
#> ret.y <- y
#> cl <- match.call()
#> mf <- match.call(expand.dots = FALSE)
#> m <- match(c("formula", "data", "subset", "weights", "na.action",
#> "offset"), names(mf), 0L)
#> mf <- mf[c(1L, m)]
#> mf$drop.unused.levels <- TRUE
#> mf[[1L]] <- quote(stats::model.frame)
#> mf <- eval(mf, parent.frame())
#> if (method == "model.frame")
#> return(mf)
#> else if (method != "qr")
#> warning(gettextf("method = '%s' is not supported. Using 'qr'",
#> method), domain = NA)
#> mt <- attr(mf, "terms")
#> y <- model.response(mf, "numeric")
#> w <- as.vector(model.weights(mf))
#> if (!is.null(w) && !is.numeric(w))
#> stop("'weights' must be a numeric vector")
#> offset <- as.vector(model.offset(mf))
#> if (!is.null(offset)) {
#> if (length(offset) != NROW(y))
#> stop(gettextf("number of offsets is %d, should equal %d (number of observations)",
#> length(offset), NROW(y)), domain = NA)
#> }
#> if (is.empty.model(mt)) {
#> x <- NULL
#> z <- list(coefficients = if (is.matrix(y)) matrix(NA_real_,
#> 0, ncol(y)) else numeric(), residuals = y, fitted.values = 0 *
#> y, weights = w, rank = 0L, df.residual = if (!is.null(w)) sum(w !=
#> 0) else if (is.matrix(y)) nrow(y) else length(y))
#> if (!is.null(offset)) {
#> z$fitted.values <- offset
#> z$residuals <- y - offset
#> }
#> }
#> else {
#> x <- model.matrix(mt, mf, contrasts)
#> z <- if (is.null(w))
#> lm.fit(x, y, offset = offset, singular.ok = singular.ok,
#> ...)
#> else lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
#> ...)
#> }
#> class(z) <- c(if (is.matrix(y)) "mlm", "lm")
#> z$na.action <- attr(mf, "na.action")
#> z$offset <- offset
#> z$contrasts <- attr(x, "contrasts")
#> z$xlevels <- .getXlevels(mt, mf)
#> z$call <- cl
#> z$terms <- mt
#> if (model)
#> z$model <- mf
#> if (ret.x)
#> z$x <- x
#> if (ret.y)
#> z$y <- y
#> if (!qr)
#> z$qr <- NULL
#> z
#> })(formula = mpg ~ cyl, data = structure(list(mpg = c(21, 21,
#> 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3,
#> 15.2, 10.4, 10.4, 14.7, 32.4, 30.4, 33.9, 21.5, 15.5, 15.2, 13.3,
#> 19.2, 27.3, 26, 30.4, 15.8, 19.7, 15, 21.4), cyl = c(6, 6, 4,
#> 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8,
#> 8, 4, 4, 4, 8, 6, 8, 4), disp = c(160, 160, 108, 258, 360, 225,
#> 360, 146.7, 140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460,
#> 440, 78.7, 75.7, 71.1, 120.1, 318, 304, 350, 400, 79, 120.3,
#> 95.1, 351, 145, 301, 121), hp = c(110, 110, 93, 110, 175, 105,
#> 245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 52,
#> 65, 97, 150, 150, 245, 175, 66, 91, 113, 264, 175, 335, 109),
#> drat = c(3.9, 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92,
#> 3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93,
#> 4.22, 3.7, 2.76, 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22,
#> 3.62, 3.54, 4.11), wt = c(2.62, 2.875, 2.32, 3.215, 3.44,
#> 3.46, 3.57, 3.19, 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25,
#> 5.424, 5.345, 2.2, 1.615, 1.835, 2.465, 3.52, 3.435, 3.84,
#> 3.845, 1.935, 2.14, 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46,
#> 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3,
#> 18.9, 17.4, 17.6, 18, 17.98, 17.82, 17.42, 19.47, 18.52,
#> 19.9, 20.01, 16.87, 17.3, 15.41, 17.05, 18.9, 16.7, 16.9,
#> 14.5, 15.5, 14.6, 18.6), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1,
#> 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1,
#> 0, 0, 0, 1), am = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#> 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1),
#> gear = c(4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3,
#> 3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4,
#> 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1,
#> 1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2)), row.names = c("Mazda RX4",
#> "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout",
#> "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280",
#> "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood",
#> "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic",
#> "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin",
#> "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2",
#> "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora",
#> "Volvo 142E"), class = "data.frame"))
#>
#> Coefficients:
#> (Intercept) cyl
#> 37.885 -2.876
# note the difference in size!
object.size(lm_model)
#> 25528 bytes
object.size(lm_model_do_call)
#> 237824 bytes
# here it is
object.size(lm_model_do_call$call)
#> 213024 bytes
# if you are a package developer, please don't save the call
# if you are a package user, you can avoid this issue with rlang
library(rlang)
#>
#> Attaching package: 'rlang'
#> The following object is masked _by_ '.GlobalEnv':
#>
#> .data
clean_lm_call <- call2("lm", mpg ~ cyl, sym("mtcars"))
clean_lm_call
#> lm(mpg ~ cyl, mtcars)
eval_bare(clean_lm_call)
#>
#> Call:
#> lm(formula = mpg ~ cyl, data = mtcars)
#>
#> Coefficients:
#> (Intercept) cyl
#> 37.885 -2.876
# or with base R
eval(call("lm", mpg ~ cyl, substitute(mtcars)))
#>
#> Call:
#> lm(formula = mpg ~ cyl, data = mtcars)
#>
#> Coefficients:
#> (Intercept) cyl
#> 37.885 -2.876
# if you need to control the environment that `mtcars` is evaluated (i.e. looked up)
# in, you can either quo() it to tie the environment to the symbol and use eval_tidy()
# or you can also specify an environment to the `eval_bare()` call for terms to be
# looked up in
# lets use a different data set where we have to control the environment
generate_call_bad <- function() {
my_data <- mtcars
call2("lm", mpg ~ cyl, sym("my_data"))
}
generate_call_bad()
#> lm(mpg ~ cyl, my_data)
# oh no :/
eval_bare(generate_call_bad())
#> Error in is.data.frame(data): object 'my_data' not found
generate_call_good <- function() {
my_data <- mtcars
call2("lm", mpg ~ cyl, quo(my_data)) # <- the symbol `my_data` + environment are kept
}
call_good <- generate_call_good()
call_good
#> lm(mpg ~ cyl, ~my_data)
# ah there it is, see the `env`ironment?
# that tells us where to look up `my_data`
call_good[[3]]
#> <quosure>
#> expr: ^my_data
#> env: 0x7f800e48fa60
eval_tidy(generate_call_good())
#>
#> Call:
#> lm(formula = mpg ~ cyl, data = ~my_data)
#>
#> Coefficients:
#> (Intercept) cyl
#> 37.885 -2.876
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment