Created
March 23, 2018 14:25
-
-
Save JohnMount/1982127318654c8631203e5b5d4946ac to your computer and use it in GitHub Desktop.
Small strengthenings of base R functions to be more pipe compatible.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Re-expose basic R data manipulation tools in more pipe friendly wrappers. | |
library("wrapr") | |
#' Evaluate an expression with a data frame acting as the inner environment. | |
#' | |
#' References can be forced to the environment with a .e$ prefix | |
#' and forced to the data frame with a .d$ prefix (failure to | |
#' lookup returns null). Only works on in-memory data.frames. | |
#' Part piping with base R series: \url{http://www.win-vector.com/blog/tag/piping-with-base-r/}. | |
#' | |
#' @param x data.frame to work with | |
#' @param expr logical expression to compute per-row | |
#' @param env environment to work in | |
#' @return evaluated result | |
#' | |
#' @seealso \code{\link[base]{with}}, \code{\link[base]{within}} | |
#' | |
#' @examples | |
#' | |
#' Temp <- 90 | |
#' Ozone_bound <- 100 | |
#' summary(with_eval(airquality, | |
#' (.d$Temp > .e$Temp) & | |
#' (!is.na(Ozone)) & (Ozone < Ozone_bound))) | |
#' | |
#' @export | |
#' | |
with_eval <- function(x, expr, env = parent.frame()) { | |
if(!is.data.frame(x)) { | |
stop("with_eval expected x to be a data.frame") | |
} | |
e <- substitute(expr) # capture expression | |
eval_env <- new.env(parent = env) | |
assign(".d", x, eval_env) # data prefix .d$ | |
assign(".e", env, eval_env) # environment prefix .e$ | |
r <- eval(e, envir = x, enclos = eval_env) | |
r | |
} | |
#' Pick a subset of rows, evaluating the subset expression | |
#' as if the columns of x were in the evaluation environment. | |
#' | |
#' References can be forced to the environment with a .e$ prefix | |
#' and forced to the data frame with a .d$ prefix (failure to | |
#' lookup returns null). Only works on in-memory data.frames. | |
#' Part piping with base R series: \url{http://www.win-vector.com/blog/tag/piping-with-base-r/}. | |
#' See also \url{http://www.win-vector.com/blog/2018/02/is-r-basesubset-really-that-bad/}. | |
#' | |
#' @param x data.frame to work with | |
#' @param subset logical expression to compute per-row | |
#' @param env environment to work in | |
#' @return data.frame that is the specified subset of the rows of x. | |
#' | |
#' @seealso \code{\link[base]{subset}} | |
#' | |
#' @examples | |
#' | |
#' Temp <- 90 | |
#' Ozone_bound <- 100 | |
#' subset_rows(airquality, | |
#' (.d$Temp > .e$Temp) & | |
#' (!is.na(Ozone)) & (Ozone < Ozone_bound)) | |
#' | |
#' @export | |
#' | |
subset_rows <- function(x, subset, env = parent.frame()) { | |
if(!is.data.frame(x)) { | |
stop("subset_rows expected x to be a data.frame") | |
} | |
if(missing(subset) || (nrow(x)<=0)) { | |
return(x) | |
} | |
e <- substitute(subset) # capture expression | |
eval_env <- new.env(parent = env) | |
assign(".d", x, eval_env) # data prefix .d$ | |
assign(".e", env, eval_env) # environment prefix .e$ | |
r <- eval(e, envir = x, enclos = eval_env) | |
if(!is.logical(r)) { | |
stop("subset_rows predicate must evaluate to logical") | |
} | |
r <- r & !is.na(r) | |
if(length(r)!=nrow(x)) { | |
stop("subset_rows predicate must have one entry per row") | |
} | |
x[r, , drop = FALSE] | |
} | |
#' Pick a sequence of columns. | |
#' | |
#' Only works on in-memory data.frames. | |
#' Part piping with base R series: \url{http://www.win-vector.com/blog/tag/piping-with-base-r/}. | |
#' | |
#' @param x data.frame to work with | |
#' @param columns character, names of columns | |
#' @return data.frame that is the specified subset of the columns of x. | |
#' | |
#' @examples | |
#' | |
#' head(sel_columns(mtcars, c("mpg", "cyl", "disp"))) | |
#' | |
#' @export | |
#' | |
sel_columns <- function(x, columns) { | |
if(!is.data.frame(x)) { | |
stop("sel_columns expected x to be a data.frame") | |
} | |
if(missing(columns)) { | |
return(x) | |
} | |
x[ , columns, drop = FALSE] | |
} | |
#' Pick a selection of rows (can be used to order). | |
#' | |
#' Only works on in-memory data.frames. | |
#' Part piping with base R series: \url{http://www.win-vector.com/blog/tag/piping-with-base-r/}. | |
#' | |
#' @param x data.frame to work with | |
#' @param rows numeric, row indexes. | |
#' @return data.frame that is the specified row selection. | |
#' | |
#' @examples | |
#' | |
#' d <- data.frame(x = c('b', 'a', 'c')) | |
#' sel_rows(d, d$x) | |
#' | |
#' @export | |
#' | |
sel_rows <- function(x, rows) { | |
if(!is.data.frame(x)) { | |
stop("sel_columns expected x to be a data.frame") | |
} | |
if(missing(rows)) { | |
return(x) | |
} | |
x[rows, , drop = FALSE] | |
} | |
#' Evaluate an expression with a data frame acting as the inner environment | |
#' and assigning back to the data.frame. Statements are executed sequentially. | |
#' | |
#' References can be forced to the environment with a .e$ prefix | |
#' and forced to the data frame with a .d$ prefix (failure to | |
#' lookup returns null). Only works on in-memory data.frames. | |
#' Part piping with base R series: \url{http://www.win-vector.com/blog/tag/piping-with-base-r/}. | |
#' | |
#' @param transform_columns_data_frame data.frame to work with | |
#' @param ... named exprssions to add to data frame | |
#' @param transform_columns_env environment to work in | |
#' @return data frame with additional or altered columns | |
#' | |
#' @seealso \code{\link[base]{transform}}, \code{\link[base]{within}} | |
#' | |
#' @examples | |
#' | |
#' d <- data.frame(x = c(1,2)) | |
#' transform_columns(d, y = x*x, d = 0, z = x + y, d = 1, q = x + d) | |
#' | |
#' @export | |
#' | |
transform_columns <- function(transform_columns_data_frame, | |
..., | |
transform_columns_env = parent.frame()) { | |
terms <- substitute(list(...))[-1] | |
if(!is.data.frame(transform_columns_data_frame)) { | |
stop("transform_columns: transform_columns_data_frame must be a data.frame") | |
} | |
if(length(terms)<=0) { | |
stop("transform_columns: expected transforms") | |
} | |
for(i in seq_len(length(terms))) { | |
ni <- names(terms)[[i]] | |
if(length(ni)<=0) { | |
stop("transform_columns: empty name") | |
} | |
if( (!is.character(ni)) && (!is.character(ni)) ) { | |
stop(paste("transform_columns: odd class for name: ", class(ni))) | |
} | |
ni <- as.character(ni) | |
if(nchar(ni)<=0) { | |
stop("transform_columns: empty name") | |
} | |
ti <- terms[[i]] | |
eval_env <- new.env(parent = transform_columns_env) | |
assign(".d", transform_columns_data_frame, eval_env) # data prefix .d$ | |
assign(".e", transform_columns_env, eval_env) # environment prefix .e$ | |
transform_columns_data_frame[[ni]] <- eval(ti, | |
envir = transform_columns_data_frame, | |
enclos = eval_env) | |
} | |
transform_columns_data_frame | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment