Skip to content

Instantly share code, notes, and snippets.

@Vessy
Last active May 14, 2016 17:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Vessy/225797327570867ecd23894de126bee9 to your computer and use it in GitHub Desktop.
Save Vessy/225797327570867ecd23894de126bee9 to your computer and use it in GitHub Desktop.
Using eval to create a function that will be able to produce a data summary (using the plyr::ddply function) based on the user specified data frame, variables (that correspond to table column names) over which the data should be split and summarized (character vector), and a two-column table (data frame) which contains the specified name of the …
# Using eval to create a function that will be able to produce a data summary (using the plyr::ddply function)
# based on the user specified data frame, variables (that correspond to table column names) over which they
# want to split and summarize the data (character vector), and a two-column table (data frame) which contains
# the user specified name of the function used for summary (first column) and the function used for summary (second column)
#
# For more info, see http://www.vesnam.com/Rblog/one-function-to-run-them-all/
#
# Data, variable, and formulas used for data summary
ex1_df <- datasets::mtcars
ex1_vars <- "gear"
ex1_formulas <- data.frame(labs=c("cust_val", "total_hp", "avg_mpg"), ress=c("mpg+2*carb", "sum(hp)", "mean(mpg)"), stringsAsFactors = FALSE)
# This is the expected result
ex1_trueResult <- plyr::ddply(ex1_df, .variable=c("gear"), function(x)
data.frame(cust_val = x$mpg+2*x$carb,
total_hp = sum(x$hp),
avg_mpg = mean(x$mpg)))
# Testing version #!
# It passes an expression vector to eval and only the last element is evaluated.
ex1_res01 <- oneForMany_first(ex1_df, ex1_vars, ex1_formulas)
# Testing version #2
# Using sapply to overcome the eval expression vector issue
# Everything is calculated, but data is not in desired format (due to column names mismatches)
# and one would need to parse it
ex1_res02 <- oneForMany_secpmd(ex1_df, ex1_vars, ex1_formulas)
# Testing version #3
# Using a list instead of vector; everything is calculated, but data is not in desired format
# and one would need to parse it
ex1_res03 <- oneForMany_third(ex1_df, ex1_vars, ex1_formulas)
# Testing version #4
# Using eval to create a function that returns a data frame with desired columns (user specified functions)
ex1_res04 <- oneForMany_fourth(ex1_df, ex1_vars, ex1_formulas)
# We can test to confirm that the results are OK
testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res04))
# Testing version #5
# Using do.call to call plyer
ex1_res05 <- oneForMany_fifth(ex1_df, ex1_vars, ex1_formulas)
# We can test to confirm that the results are OK
testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res05))
# Different data set
ex2_df <- datasets::quakes
ex2_vars <- c("lat", "long")
ex2_formulas <- data.frame(what=c("l1", "l2", "l3", "l4"),
how=c("lat+2*long", "depth^2/log(mag+1)", "(lat+long)/(stations*0.5)", "mean(mag)"), stringsAsFactors = FALSE)
ex2_trueResult <- plyr::ddply(ex2_df, .variable=c("lat", "long"), function(x)
data.frame(l1 = x$lat+2*x$long,
l2 = x$depth^2/log(x$mag+1),
l3 = (x$lat+x$long)/(x$stations*0.5),
l4 = mean(x$mag)))
ex2_res04 <- oneForMany_fourth(ex2_df, ex2_vars, ex2_formulas)
testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res04))
ex2_res05 <- oneForMany_fifth(ex2_df, ex2_vars, ex2_formulas)
testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res05))
oneForMany_fifth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){
if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
stop("You did not specify all arguments!")
if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")
if (ncol(cols_ops)>2)
stop("Data frame defined by third argument should have only two columns!")
func_list <- eval(parse(text=
paste("as.list(c(",
paste("parse(text=\"", as.character(cols_ops[,2]), sep="", collapse = "\"), "),
"\")))", sep="")))
func_list <- setNames(func_list, as.character(cols_ops[,1]))
result_mutate <- do.call(plyr::ddply, c(list(.data = df_to_use, .variables = cols_fix, .fun = plyr::mutate), func_list))
result_mutate[, c(cols_fix, as.character(cols_ops[,1]))]
}
oneForMany_first <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){
if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
stop("You did not specify all arguments!")
if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")
if (ncol(cols_ops)>2)
stop("Data frame defined by third argument should have only two columns!")
# Transform assignments into appropriate format
col_names.sorted <- sort(colnames(df_to_use))
col_names.length <- length(col_names.sorted)
# DF needs to be in stringsAsFactors=FALSE mode
for (j in 1:nrow(cols_ops)){
for (i in col_names.length:1){
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
}
for (i in 1:col_names.length){
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
}
}
# Create a vector of functions
func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep=""))
# Run plyer
plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list)))
}
oneForMany_fourth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){
if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
stop("You did not specify all arguments!")
if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")
if (ncol(cols_ops)>2)
stop("Data frame defined by third argument should have only two columns!")
# Transform assignments into appropriate format
col_names.sorted <- sort(colnames(df_to_use))
col_names.length <- length(col_names.sorted)
# DF needs to be in stringsAsFactors=FALSE mode
for (j in 1:nrow(cols_ops)){
for (i in col_names.length:1){
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
}
for (i in 1:col_names.length){
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
}
}
func_list <- function(x){
eval(parse(text = paste(as.character(cols_ops[,1]), " <- ", cols_ops[,2], sep="")))
eval(parse(text = paste("data.frame(", paste(as.character(cols_ops[,1]), sep="", collapse=", "), ")", sep="")))
}
plyr::ddply(df_to_use, .variable=cols_fix,function(x) func_list(x))
}
oneForMany_second <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){
if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
stop("You did not specify all arguments!")
if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")
if (ncol(cols_ops)>2)
stop("Data frame defined by third argument should have only two columns!")
# Transform assignments into appropriate format
col_names.sorted <- sort(colnames(df_to_use))
col_names.length <- length(col_names.sorted)
# DF needs to be in stringsAsFactors=FALSE mode
for (j in 1:nrow(cols_ops)){
for (i in col_names.length:1){
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
}
for (i in 1:col_names.length){
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
}
}
# Create a vector of functions
func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep=""))
# Run plyer
plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(sapply(func_list, function(y) eval(y))))
}
oneForMany_third <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){
if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
stop("You did not specify all arguments!")
if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")
if (ncol(cols_ops)>2)
stop("Data frame defined by third argument should have only two columns!")
# Transform assignments into appropriate format
col_names.sorted <- sort(colnames(df_to_use))
col_names.length <- length(col_names.sorted)
# DF needs to be in stringsAsFactors=FALSE mode
for (j in 1:nrow(cols_ops)){
for (i in col_names.length:1){
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
}
for (i in 1:col_names.length){
cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
}
}
# Create a vector of functions
func_list <- parse(text = paste("as.list(c(", paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep="", collapse=", "), "))", sep=""))
# Run plyer
plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list)))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment