Skip to content

Instantly share code, notes, and snippets.

@skranz
Created May 16, 2014 17:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skranz/b22b60a83f5c6ab334f7 to your computer and use it in GitHub Desktop.
Save skranz/b22b60a83f5c6ab334f7 to your computer and use it in GitHub Desktop.
mutate with possibility to select rows
mutate2 = function (.data,.where, ...) {
UseMethod("mutate2")
}
mutate2.data.frame =function (.data,.where, ...)
{
if (!missing(.where)) {
.where = substitute(.where)
dt = as.data.table(df)
df = as.data.frame(mutate_where.data.table(.data=dt,.where.call=.where,...,inplace=TRUE))
return(df)
}
tbl <- tbl_df(.data)
res <- mutate.tbl_df(tbl, ...)
as.data.frame(res)
}
mutate2.data.table <- function (.data,.where, ..., inplace = FALSE)
{
if (!inplace)
.data <- copy(.data)
if (!missing(.where)) {
.where = substitute(.where)
dt = mutate_where.data.table(.data=dt,.where.call=.where,..., inplace=TRUE)
return(dt)
}
env <- new.env(parent = parent.frame(), size = 1L)
env$data <- .data
cols <- named_dots(...)
for (i in seq_along(cols)) {
call <- substitute(data[, `:=`(lhs, rhs)], list(lhs = as.name(names(cols)[[i]]),
rhs = cols[[i]]))
eval(call, env)
}
.data
}
mutate_where.data.table <- function (.data,.where.call, ..., inplace = FALSE)
{
if (!inplace)
.data <- copy(.data)
env <- new.env(parent = parent.frame(), size = 1L)
env$data <- .data
cols <- named_dots(...)
for (i in seq_along(cols)) {
call <- substitute(data[.where.call, `:=`(lhs, rhs)], list(lhs = as.name(names(cols)[[i]]), rhs = cols[[i]], .where.call =.where.call))
eval(call, env)
}
.data
}
examples = function() {
library(microbenchmark)
#library(modify)
library(dplyr)
library(pryr)
library(data.table)
# Benckmark compared to directly using data.table or dplyr
n = 1e6
df = data.frame(a= sample(1:5,n,replace=TRUE),
b= sample(1:100,n,replace=TRUE),
x=rnorm(n))
dt = as.data.table(df)
res1 = mutate2(df,a==3,x=x+100)
res2 = mutate2(dt,a==3,x=x+100)
microbenchmark(times = 5L,
#modify(dt,a==2, x = x+100),
mutate(dt, x=ifelse(a==2,x+100,x)),
mutate2(dt, a==2, x=x+100),
mutate(df, x=ifelse(a==2,x+100,x)),
mutate2(df, a==2, x=x+100)
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment