Vessy/oneForMany.R

## oneForMany.R
# Using eval to create a function that will be able to produce a data summary (using the plyr::ddply function)
# based on the user specified data frame, variables (that correspond to table column names) over which they
# want to split and summarize the data (character vector), and a two-column table (data frame) which contains
# the user specified name of the function used for summary (first column) and the function used for summary (second column)
#
# For more info, see http://www.vesnam.com/Rblog/one-function-to-run-them-all/
#

# Data, variable, and formulas used for data summary
ex1_df <- datasets::mtcars
ex1_vars <- "gear"
ex1_formulas <- data.frame(labs=c("cust_val", "total_hp", "avg_mpg"), ress=c("mpg+2*carb", "sum(hp)", "mean(mpg)"), stringsAsFactors = FALSE)

# This is the expected result
ex1_trueResult <- plyr::ddply(ex1_df, .variable=c("gear"), function(x)
  data.frame(cust_val = x$mpg+2*x$carb,
             total_hp = sum(x$hp),
             avg_mpg = mean(x$mpg)))

# Testing version #!
# It passes an expression vector to eval and only the last element is evaluated.
ex1_res01 <- oneForMany_first(ex1_df, ex1_vars, ex1_formulas)

# Testing version #2
# Using sapply to overcome the eval expression vector issue
# Everything is calculated, but data is not in desired format (due to column names mismatches)
# and one would need to parse it
ex1_res02 <- oneForMany_secpmd(ex1_df, ex1_vars, ex1_formulas)

# Testing version #3
# Using a list instead of vector; everything is calculated, but data is not in desired format
# and one would need to parse it
ex1_res03 <- oneForMany_third(ex1_df, ex1_vars, ex1_formulas)

# Testing version #4
# Using eval to create a function that returns a data frame with desired columns (user specified functions)
ex1_res04 <- oneForMany_fourth(ex1_df, ex1_vars, ex1_formulas)
# We can test to confirm that the results are OK
testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res04))

# Testing version #5
# Using do.call to call plyer
ex1_res05 <- oneForMany_fifth(ex1_df, ex1_vars, ex1_formulas)
# We can test to confirm that the results are OK
testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res05))

# Different data set
ex2_df <- datasets::quakes
ex2_vars <- c("lat", "long")
ex2_formulas <-  data.frame(what=c("l1", "l2", "l3", "l4"),
                      how=c("lat+2*long", "depth^2/log(mag+1)", "(lat+long)/(stations*0.5)", "mean(mag)"), stringsAsFactors = FALSE)

ex2_trueResult <- plyr::ddply(ex2_df, .variable=c("lat", "long"), function(x)
  data.frame(l1 = x$lat+2*x$long,
             l2 = x$depth^2/log(x$mag+1),
             l3 = (x$lat+x$long)/(x$stations*0.5),
             l4 = mean(x$mag)))

ex2_res04 <- oneForMany_fourth(ex2_df, ex2_vars, ex2_formulas)
testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res04))

ex2_res05 <- oneForMany_fifth(ex2_df, ex2_vars, ex2_formulas)
testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res05))

## oneForMany_fifth.R
oneForMany_fifth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

  if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
    stop("You did not specify all arguments!")

  if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
    stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

  if (ncol(cols_ops)>2)
    stop("Data frame defined by third argument should have only two columns!")

  func_list <- eval(parse(text=
                            paste("as.list(c(",
                                  paste("parse(text=\"", as.character(cols_ops[,2]), sep="", collapse = "\"), "),
                                  "\")))", sep="")))

  func_list <- setNames(func_list, as.character(cols_ops[,1]))

  result_mutate <- do.call(plyr::ddply, c(list(.data = df_to_use, .variables = cols_fix,  .fun = plyr::mutate), func_list))

  result_mutate[, c(cols_fix, as.character(cols_ops[,1]))]
}

## oneForMany_first.R
oneForMany_first <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

  if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
    stop("You did not specify all arguments!")

  if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
    stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

  if (ncol(cols_ops)>2)
    stop("Data frame defined by third argument should have only two columns!")

  # Transform assignments into appropriate format
  col_names.sorted <- sort(colnames(df_to_use))
  col_names.length <- length(col_names.sorted)

  # DF needs to be in stringsAsFactors=FALSE mode
  for (j in 1:nrow(cols_ops)){

    for (i in col_names.length:1){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
    }

    for (i in 1:col_names.length){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]),  paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
    }
  }

  # Create a vector of functions
  func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep=""))

  # Run plyer
  plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list)))
}

## oneForMany_fourth.R
oneForMany_fourth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

  if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
    stop("You did not specify all arguments!")

  if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
    stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

  if (ncol(cols_ops)>2)
    stop("Data frame defined by third argument should have only two columns!")

  # Transform assignments into appropriate format
  col_names.sorted <- sort(colnames(df_to_use))
  col_names.length <- length(col_names.sorted)

  # DF needs to be in stringsAsFactors=FALSE mode
  for (j in 1:nrow(cols_ops)){

    for (i in col_names.length:1){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
    }

    for (i in 1:col_names.length){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]),  paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
    }
  }

  func_list  <- function(x){
    eval(parse(text = paste(as.character(cols_ops[,1]), " <- ", cols_ops[,2], sep="")))
    eval(parse(text = paste("data.frame(", paste(as.character(cols_ops[,1]), sep="", collapse=", "), ")", sep="")))
  }


  plyr::ddply(df_to_use, .variable=cols_fix,function(x) func_list(x))
}

## oneForMany_second.R
oneForMany_second <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

  if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
    stop("You did not specify all arguments!")

  if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
    stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

  if (ncol(cols_ops)>2)
    stop("Data frame defined by third argument should have only two columns!")

  # Transform assignments into appropriate format
  col_names.sorted <- sort(colnames(df_to_use))
  col_names.length <- length(col_names.sorted)

  # DF needs to be in stringsAsFactors=FALSE mode
  for (j in 1:nrow(cols_ops)){

    for (i in col_names.length:1){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
    }

    for (i in 1:col_names.length){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]),  paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
    }
  }

  # Create a vector of functions
  func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep=""))

  # Run plyer
  plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(sapply(func_list, function(y) eval(y))))
}

## oneForMany_third.R
oneForMany_third <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

  if ((is.null(df_to_use)) | (is.null(cols_fix)) | (is.null(cols_ops)))
    stop("You did not specify all arguments!")

  if ((!is.data.frame(df_to_use)) | (!is.character(cols_fix)) | (!is.data.frame(cols_ops)))
    stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

  if (ncol(cols_ops)>2)
    stop("Data frame defined by third argument should have only two columns!")

  # Transform assignments into appropriate format
  col_names.sorted <- sort(colnames(df_to_use))
  col_names.length <- length(col_names.sorted)

  # DF needs to be in stringsAsFactors=FALSE mode
  for (j in 1:nrow(cols_ops)){

    for (i in col_names.length:1){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
    }

    for (i in 1:col_names.length){
      cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]),  paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
    }
  }

  # Create a vector of functions
  func_list <- parse(text = paste("as.list(c(", paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep="", collapse=", "), "))", sep=""))

  # Run plyer
  plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list)))
}
	# Using eval to create a function that will be able to produce a data summary (using the plyr::ddply function)
	# based on the user specified data frame, variables (that correspond to table column names) over which they
	# want to split and summarize the data (character vector), and a two-column table (data frame) which contains
	# the user specified name of the function used for summary (first column) and the function used for summary (second column)
	#
	# For more info, see http://www.vesnam.com/Rblog/one-function-to-run-them-all/
	#

	# Data, variable, and formulas used for data summary
	ex1_df <- datasets::mtcars
	ex1_vars <- "gear"
	ex1_formulas <- data.frame(labs=c("cust_val", "total_hp", "avg_mpg"), ress=c("mpg+2*carb", "sum(hp)", "mean(mpg)"), stringsAsFactors = FALSE)

	# This is the expected result
	ex1_trueResult <- plyr::ddply(ex1_df, .variable=c("gear"), function(x)
	data.frame(cust_val = x$mpg+2*x$carb,
	total_hp = sum(x$hp),
	avg_mpg = mean(x$mpg)))

	# Testing version #!
	# It passes an expression vector to eval and only the last element is evaluated.
	ex1_res01 <- oneForMany_first(ex1_df, ex1_vars, ex1_formulas)

	# Testing version #2
	# Using sapply to overcome the eval expression vector issue
	# Everything is calculated, but data is not in desired format (due to column names mismatches)
	# and one would need to parse it
	ex1_res02 <- oneForMany_secpmd(ex1_df, ex1_vars, ex1_formulas)

	# Testing version #3
	# Using a list instead of vector; everything is calculated, but data is not in desired format
	# and one would need to parse it
	ex1_res03 <- oneForMany_third(ex1_df, ex1_vars, ex1_formulas)

	# Testing version #4
	# Using eval to create a function that returns a data frame with desired columns (user specified functions)
	ex1_res04 <- oneForMany_fourth(ex1_df, ex1_vars, ex1_formulas)
	# We can test to confirm that the results are OK
	testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res04))

	# Testing version #5
	# Using do.call to call plyer
	ex1_res05 <- oneForMany_fifth(ex1_df, ex1_vars, ex1_formulas)
	# We can test to confirm that the results are OK
	testthat::expect_that(ex1_trueResult, testthat::equals(ex1_res05))

	# Different data set
	ex2_df <- datasets::quakes
	ex2_vars <- c("lat", "long")
	ex2_formulas <- data.frame(what=c("l1", "l2", "l3", "l4"),
	how=c("lat+2long", "depth^2/log(mag+1)", "(lat+long)/(stations0.5)", "mean(mag)"), stringsAsFactors = FALSE)

	ex2_trueResult <- plyr::ddply(ex2_df, .variable=c("lat", "long"), function(x)
	data.frame(l1 = x$lat+2*x$long,
	l2 = x$depth^2/log(x$mag+1),
	l3 = (x$lat+x$long)/(x$stations*0.5),
	l4 = mean(x$mag)))

	ex2_res04 <- oneForMany_fourth(ex2_df, ex2_vars, ex2_formulas)
	testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res04))

	ex2_res05 <- oneForMany_fifth(ex2_df, ex2_vars, ex2_formulas)
	testthat::expect_that(ex2_trueResult, testthat::equals(ex2_res05))
	oneForMany_fifth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

	if ((is.null(df_to_use)) \| (is.null(cols_fix)) \| (is.null(cols_ops)))
	stop("You did not specify all arguments!")

	if ((!is.data.frame(df_to_use)) \| (!is.character(cols_fix)) \| (!is.data.frame(cols_ops)))
	stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

	if (ncol(cols_ops)>2)
	stop("Data frame defined by third argument should have only two columns!")

	func_list <- eval(parse(text=
	paste("as.list(c(",
	paste("parse(text=\"", as.character(cols_ops[,2]), sep="", collapse = "\"), "),
	"\")))", sep="")))

	func_list <- setNames(func_list, as.character(cols_ops[,1]))

	result_mutate <- do.call(plyr::ddply, c(list(.data = df_to_use, .variables = cols_fix, .fun = plyr::mutate), func_list))

	result_mutate[, c(cols_fix, as.character(cols_ops[,1]))]
	}
	oneForMany_first <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

	if ((is.null(df_to_use)) \| (is.null(cols_fix)) \| (is.null(cols_ops)))
	stop("You did not specify all arguments!")

	if ((!is.data.frame(df_to_use)) \| (!is.character(cols_fix)) \| (!is.data.frame(cols_ops)))
	stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

	if (ncol(cols_ops)>2)
	stop("Data frame defined by third argument should have only two columns!")

	# Transform assignments into appropriate format
	col_names.sorted <- sort(colnames(df_to_use))
	col_names.length <- length(col_names.sorted)

	# DF needs to be in stringsAsFactors=FALSE mode
	for (j in 1:nrow(cols_ops)){

	for (i in col_names.length:1){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
	}

	for (i in 1:col_names.length){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
	}
	}

	# Create a vector of functions
	func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep=""))

	# Run plyer
	plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list)))
	}
	oneForMany_fourth <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

	if ((is.null(df_to_use)) \| (is.null(cols_fix)) \| (is.null(cols_ops)))
	stop("You did not specify all arguments!")

	if ((!is.data.frame(df_to_use)) \| (!is.character(cols_fix)) \| (!is.data.frame(cols_ops)))
	stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

	if (ncol(cols_ops)>2)
	stop("Data frame defined by third argument should have only two columns!")

	# Transform assignments into appropriate format
	col_names.sorted <- sort(colnames(df_to_use))
	col_names.length <- length(col_names.sorted)

	# DF needs to be in stringsAsFactors=FALSE mode
	for (j in 1:nrow(cols_ops)){

	for (i in col_names.length:1){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
	}

	for (i in 1:col_names.length){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
	}
	}

	func_list <- function(x){
	eval(parse(text = paste(as.character(cols_ops[,1]), " <- ", cols_ops[,2], sep="")))
	eval(parse(text = paste("data.frame(", paste(as.character(cols_ops[,1]), sep="", collapse=", "), ")", sep="")))
	}


	plyr::ddply(df_to_use, .variable=cols_fix,function(x) func_list(x))
	}
	oneForMany_second <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

	if ((is.null(df_to_use)) \| (is.null(cols_fix)) \| (is.null(cols_ops)))
	stop("You did not specify all arguments!")

	if ((!is.data.frame(df_to_use)) \| (!is.character(cols_fix)) \| (!is.data.frame(cols_ops)))
	stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

	if (ncol(cols_ops)>2)
	stop("Data frame defined by third argument should have only two columns!")

	# Transform assignments into appropriate format
	col_names.sorted <- sort(colnames(df_to_use))
	col_names.length <- length(col_names.sorted)

	# DF needs to be in stringsAsFactors=FALSE mode
	for (j in 1:nrow(cols_ops)){

	for (i in col_names.length:1){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
	}

	for (i in 1:col_names.length){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
	}
	}

	# Create a vector of functions
	func_list <- parse(text = paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep=""))

	# Run plyer
	plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(sapply(func_list, function(y) eval(y))))
	}
	oneForMany_third <- function(df_to_use = NA, cols_fix = NA, cols_ops = NA){

	if ((is.null(df_to_use)) \| (is.null(cols_fix)) \| (is.null(cols_ops)))
	stop("You did not specify all arguments!")

	if ((!is.data.frame(df_to_use)) \| (!is.character(cols_fix)) \| (!is.data.frame(cols_ops)))
	stop("Wrong argument type(s)! First and third arguments need to be a data frame; second argument needs to be a vector of characters!")

	if (ncol(cols_ops)>2)
	stop("Data frame defined by third argument should have only two columns!")

	# Transform assignments into appropriate format
	col_names.sorted <- sort(colnames(df_to_use))
	col_names.length <- length(col_names.sorted)

	# DF needs to be in stringsAsFactors=FALSE mode
	for (j in 1:nrow(cols_ops)){

	for (i in col_names.length:1){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), col_names.sorted[i], paste("hlp[",as.character(i), "]", sep=""))
	}

	for (i in 1:col_names.length){
	cols_ops[j,2] <- stringr::str_replace_all(as.character(cols_ops[j,2]), paste("hlp\\[",as.character(i), "\\]", sep=""), paste("x$", col_names.sorted[i], sep=""))
	}
	}

	# Create a vector of functions
	func_list <- parse(text = paste("as.list(c(", paste(as.character(cols_ops[,1]), " = ", cols_ops[,2], sep="", collapse=", "), "))", sep=""))

	# Run plyer
	plyr::ddply(df_to_use, .variable=cols_fix, function(x) data.frame(eval(func_list)))
	}