Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@skranz
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skranz/85ba6dbe406a13d3e262 to your computer and use it in GitHub Desktop.
Save skranz/85ba6dbe406a13d3e262 to your computer and use it in GitHub Desktop.
modify, a function that wraps data.table for quick replacement of values in selected rows
library(dplyr)
library(data.table)
EmptySymbol = function() (quote(f(,)))[[2]]
get.data.table.modify.call = function(args=NULL, filter.call=NULL, by=NULL, dat.quote=quote(dt)) {
if (length(args)==1) {
com = call(":=",names(args)[1],args[[1]])
} else {
com = as.call(c(list(quote(`:=`)),args))
}
if (is.null(filter.call)) {
ca = call('[',dat.quote, EmptySymbol(),com )
} else {
ca = call('[',dat.quote, filter.call, com)
}
if (!is.null(by)) {
ca$by = by
}
ca
}
# A verb to replace data
modify = function(.data,...,.filter,.by=NULL, .envir=parent.frame()) {
.data = substitute(.data)
args = eval(substitute(alist(...)))
data.var = as.character(.data)
dat = get(data.var,.envir)
if (!is.data.table(dat)) {
dt = as.data.table(dat)
} else {
dt = dat
}
if (missing(.filter)) {
filter.call=NULL
} else {
filter.call=substitute(.filter)
}
ca = get.data.table.modify.call(args=args, by=.by, filter.call=filter.call)
eval(ca)
if (!is.data.table(dat)) {
if (is.tbl(dat)) {
dat = as.tbl(dt)
} else {
dat = as(dt, class(dat))
}
assign(data.var, dat, .envir)
}
invisible(dat)
}
examples.modify = function() {
library(dplyr)
library(data.table)
n = 1e6
df = data.frame(a= sample(1:5,n,replace=TRUE),
b= sample(1:100,n,replace=TRUE),
x=rnorm(n))
dt = as.data.table(df)
tbl = as.tbl(df)
#modify(tbl, x = x+100,.filter=a==2)
microbenchmark(times = 5L,
modify(tbl, x = x+100,.filter=a==2),
modify(df, x = x+100,.filter=a==2),
modify(dt, x = x+100,.filter=a==2),
dt[a==2,x:=x+100],
mutate.df = mutate(df, x=ifelse(a==2,x+1,x)),
mutate.tbl = mutate(tbl, x=ifelse(a==2,x+1,x))
)
## Results
## Unit: milliseconds
## expr min lq median uq max neval
## modify(tbl, x = x + 100, .filter = a == 2) 54.96051 56.38434 61.31385 61.50925 67.71102 5
## modify(df, x = x + 100, .filter = a == 2) 65.90863 85.99066 86.04277 86.13829 91.89006 5
## modify(dt, x = x + 100, .filter = a == 2) 50.61876 56.41078 61.40109 61.45043 61.79544 5
## dt[a == 2, `:=`(x, x + 100)] 50.85836 56.50513 60.78332 61.05451 67.58312 5
## mutate.df 794.84943 821.20072 827.20788 837.43679 849.53164 5
## mutate.tbl 788.86398 802.19081 835.90954 843.26593 899.45605 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment