brodieG/data.table.standard.eval.R

## data.table.standard.eval.R
# Because there is no way to tell data.table
# "interpret this variable as a column name", it's possible to come up
# with corner cases.  I'll grant these are unlikely to occur in day
# to day use, but any function that uses `data.table` must account for
# them

# Low odds, and yes, there are workarounds, but this is
# what I mean by you have to think carefully to avoid
# corner cases

# Ex 1

my.dt <- data.table(col=letters[1:5], col2=1:5)
fun <- mean
col <- "col2"
my.dt[, fun(get(col))]

# this one in particular very unlikely, but illustrating a point

# Ex 2

mtcars.dt <- data.table(mtcars)
mtcars.dt[,`cyl,am`:= 1]

grp <- "cyl,am"
mtcars.dt[,mean(hp), by=grp]

grp <- "`cyl,am`"
mtcars.dt[,mean(hp), by=grp]

# This one actually works fine, but again, you have to be careful
# by signaling your intent with an expression instead of a symbol
# name, which is not at all intuitive to anyone familiar with R.
# The `get` solution is internally consistent, at least, though
# with the collision issue I highlighted earlier

# Ex 3

cols <- c("hp", "mpg")
fun <- mean
(data.table(mtcars)[, cols:=lapply(.SD, fun), .SDcols=cols])
(data.table(mtcars)[, (cols):=lapply(.SD, fun), .SDcols=cols])

# Let's try to group by expressions (to be fair, you can't
# really do this with `dplyr`)

# Ex 4

exp <- list(a=quote(gear %% 2), b=quote(cut(hp, 5)))

data.table(mtcars)[, mean(mpg), by=list(a=gear %% 2, b=cut(hp, 5))]
data.table(mtcars)[, mean(mpg), by=exp]         # argh

# Ex 5

group_by_exp <- function(exp)
  data.table(mtcars)[, mean(mpg), by=eval(substitute(exp))]

group_by_exp(list(a=gear %% 2, b=cut(hp, 5)))   # this kind of wokrs

# Ex 6

exp.q <- quote(list(a=gear %% 2, b=cut(hp, 5)))
group_by_exp(exp.q)                             # argh

group_by_exp2 <- function(exp)
  data.table(mtcars)[, mean(mpg), by=eval(eval(substitute(exp)))]

group_by_exp2(exp.q)                           # now we're getting crazy...

data.table(mtcars)[, mean(mpg), by=exp.q]      # this actually works!, but not documented

# Again, everyone one of these has workarounds, though they require
# some care.  I'd like a version of `[.data.table` that allows me
# to very explicitly tell it how to interpret things so that I don't
# have to worry about funny corner cases due to the flexibility in
# data.table.  Don't get me wrong, for the most part the flexibility
# is fantastic.
	# Because there is no way to tell data.table
	# "interpret this variable as a column name", it's possible to come up
	# with corner cases. I'll grant these are unlikely to occur in day
	# to day use, but any function that uses `data.table` must account for
	# them

	# Low odds, and yes, there are workarounds, but this is
	# what I mean by you have to think carefully to avoid
	# corner cases

	# Ex 1

	my.dt <- data.table(col=letters[1:5], col2=1:5)
	fun <- mean
	col <- "col2"
	my.dt[, fun(get(col))]

	# this one in particular very unlikely, but illustrating a point

	# Ex 2

	mtcars.dt <- data.table(mtcars)
	mtcars.dt[,`cyl,am`:= 1]

	grp <- "cyl,am"
	mtcars.dt[,mean(hp), by=grp]

	grp <- "`cyl,am`"
	mtcars.dt[,mean(hp), by=grp]

	# This one actually works fine, but again, you have to be careful
	# by signaling your intent with an expression instead of a symbol
	# name, which is not at all intuitive to anyone familiar with R.
	# The `get` solution is internally consistent, at least, though
	# with the collision issue I highlighted earlier

	# Ex 3

	cols <- c("hp", "mpg")
	fun <- mean
	(data.table(mtcars)[, cols:=lapply(.SD, fun), .SDcols=cols])
	(data.table(mtcars)[, (cols):=lapply(.SD, fun), .SDcols=cols])

	# Let's try to group by expressions (to be fair, you can't
	# really do this with `dplyr`)

	# Ex 4

	exp <- list(a=quote(gear %% 2), b=quote(cut(hp, 5)))

	data.table(mtcars)[, mean(mpg), by=list(a=gear %% 2, b=cut(hp, 5))]
	data.table(mtcars)[, mean(mpg), by=exp] # argh

	# Ex 5

	group_by_exp <- function(exp)
	data.table(mtcars)[, mean(mpg), by=eval(substitute(exp))]

	group_by_exp(list(a=gear %% 2, b=cut(hp, 5))) # this kind of wokrs

	# Ex 6

	exp.q <- quote(list(a=gear %% 2, b=cut(hp, 5)))
	group_by_exp(exp.q) # argh

	group_by_exp2 <- function(exp)
	data.table(mtcars)[, mean(mpg), by=eval(eval(substitute(exp)))]

	group_by_exp2(exp.q) # now we're getting crazy...

	data.table(mtcars)[, mean(mpg), by=exp.q] # this actually works!, but not documented

	# Again, everyone one of these has workarounds, though they require
	# some care. I'd like a version of `[.data.table` that allows me
	# to very explicitly tell it how to interpret things so that I don't
	# have to worry about funny corner cases due to the flexibility in
	# data.table. Don't get me wrong, for the most part the flexibility
	# is fantastic.