Last active
April 8, 2020 11:02
-
-
Save brodieG/046e7cdd2acf42d95909 to your computer and use it in GitHub Desktop.
Corner Cases With Non-Standard Evaluation in data.table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Because there is no way to tell data.table | |
# "interpret this variable as a column name", it's possible to come up | |
# with corner cases. I'll grant these are unlikely to occur in day | |
# to day use, but any function that uses `data.table` must account for | |
# them | |
# Low odds, and yes, there are workarounds, but this is | |
# what I mean by you have to think carefully to avoid | |
# corner cases | |
# Ex 1 | |
my.dt <- data.table(col=letters[1:5], col2=1:5) | |
fun <- mean | |
col <- "col2" | |
my.dt[, fun(get(col))] | |
# this one in particular very unlikely, but illustrating a point | |
# Ex 2 | |
mtcars.dt <- data.table(mtcars) | |
mtcars.dt[,`cyl,am`:= 1] | |
grp <- "cyl,am" | |
mtcars.dt[,mean(hp), by=grp] | |
grp <- "`cyl,am`" | |
mtcars.dt[,mean(hp), by=grp] | |
# This one actually works fine, but again, you have to be careful | |
# by signaling your intent with an expression instead of a symbol | |
# name, which is not at all intuitive to anyone familiar with R. | |
# The `get` solution is internally consistent, at least, though | |
# with the collision issue I highlighted earlier | |
# Ex 3 | |
cols <- c("hp", "mpg") | |
fun <- mean | |
(data.table(mtcars)[, cols:=lapply(.SD, fun), .SDcols=cols]) | |
(data.table(mtcars)[, (cols):=lapply(.SD, fun), .SDcols=cols]) | |
# Let's try to group by expressions (to be fair, you can't | |
# really do this with `dplyr`) | |
# Ex 4 | |
exp <- list(a=quote(gear %% 2), b=quote(cut(hp, 5))) | |
data.table(mtcars)[, mean(mpg), by=list(a=gear %% 2, b=cut(hp, 5))] | |
data.table(mtcars)[, mean(mpg), by=exp] # argh | |
# Ex 5 | |
group_by_exp <- function(exp) | |
data.table(mtcars)[, mean(mpg), by=eval(substitute(exp))] | |
group_by_exp(list(a=gear %% 2, b=cut(hp, 5))) # this kind of wokrs | |
# Ex 6 | |
exp.q <- quote(list(a=gear %% 2, b=cut(hp, 5))) | |
group_by_exp(exp.q) # argh | |
group_by_exp2 <- function(exp) | |
data.table(mtcars)[, mean(mpg), by=eval(eval(substitute(exp)))] | |
group_by_exp2(exp.q) # now we're getting crazy... | |
data.table(mtcars)[, mean(mpg), by=exp.q] # this actually works!, but not documented | |
# Again, everyone one of these has workarounds, though they require | |
# some care. I'd like a version of `[.data.table` that allows me | |
# to very explicitly tell it how to interpret things so that I don't | |
# have to worry about funny corner cases due to the flexibility in | |
# data.table. Don't get me wrong, for the most part the flexibility | |
# is fantastic. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note: discussion is being continued on e-mail. Will report back with conclusions.