Skip to content

Instantly share code, notes, and snippets.

@sachsmc
Created February 5, 2014 02:24
Show Gist options
  • Save sachsmc/8816428 to your computer and use it in GitHub Desktop.
Save sachsmc/8816428 to your computer and use it in GitHub Desktop.
ddplyWithTotals.R
##############################################################
## Author: Michael Sachs, michael.sachs@nih.gov
## Version 1.0
## Date Updated: 9/16/2013
# Description: An extension of the ddply function from the
# plyr package that additionally evaluates the user-supplied
# function on subsets defined by lower-order combinations of the
# variables. This is useful for calculating sub-totals and totals.
#
# Usage: See ?ddply.
# .data is a data.frame to be split and analyzed and combined
# .variables supplied as a forumla or vector of characters on which .data is split
# .fun is a function that takes a data frame as input and returns a data frame.
#
# Value: A data frame. Inserts "Total" as additional levels of the .variables
##############################################################
ddplyWithTotals <- function(.data, .variables, .fun = NULL,
nestedOnly = FALSE, grandTotal = FALSE){
require(plyr)
.variables <- as.quoted(.variables)
fullTab <- ddply(.data, .variables, .fun)
len <- length(.variables)
combos <- lapply(1:(len-1), function(i) combn(1:len, i))
if(grandTotal) combos[[len]] <- matrix(0)
if(nestedOnly){ combos <- lapply(1:(len-1), function(i) matrix((1:len)[1:i]) ) }
addTotalNames <- function(df){
tempout <- df
missingNames <- unique(unlist(lapply(names(fullTab),
function(nm){
gp <- grep(nm, names(tempout), value=FALSE)
if(length(gp) == 0) return(nm)}
)))
for(nm in missingNames){ tempout[[nm]] <- "zzTotal" }
tempout
}
tabOut <- fullTab
for(i in 1:length(combos)){
for(j in 1:dim(combos[[i]])[2]){
plytemp <- addTotalNames(ddply(.data, .variables[combos[[i]][,j]], .fun))
tabOut <- merge(tabOut, plytemp, all = TRUE)
}
}
for(var in .variables){
var <- paste(var)
if(is.factor(tabOut[[var]])){
levels(tabOut[[var]])[levels(tabOut[[var]])=="zzTotal"] <- "Total"
} else{
tabOut[[var]][tabOut[[var]]=="zzTotal"] <- "Total"
}
}
return(tabOut)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment