Created
February 5, 2014 02:24
-
-
Save sachsmc/8816428 to your computer and use it in GitHub Desktop.
ddplyWithTotals.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################## | |
## Author: Michael Sachs, michael.sachs@nih.gov | |
## Version 1.0 | |
## Date Updated: 9/16/2013 | |
# Description: An extension of the ddply function from the | |
# plyr package that additionally evaluates the user-supplied | |
# function on subsets defined by lower-order combinations of the | |
# variables. This is useful for calculating sub-totals and totals. | |
# | |
# Usage: See ?ddply. | |
# .data is a data.frame to be split and analyzed and combined | |
# .variables supplied as a forumla or vector of characters on which .data is split | |
# .fun is a function that takes a data frame as input and returns a data frame. | |
# | |
# Value: A data frame. Inserts "Total" as additional levels of the .variables | |
############################################################## | |
ddplyWithTotals <- function(.data, .variables, .fun = NULL, | |
nestedOnly = FALSE, grandTotal = FALSE){ | |
require(plyr) | |
.variables <- as.quoted(.variables) | |
fullTab <- ddply(.data, .variables, .fun) | |
len <- length(.variables) | |
combos <- lapply(1:(len-1), function(i) combn(1:len, i)) | |
if(grandTotal) combos[[len]] <- matrix(0) | |
if(nestedOnly){ combos <- lapply(1:(len-1), function(i) matrix((1:len)[1:i]) ) } | |
addTotalNames <- function(df){ | |
tempout <- df | |
missingNames <- unique(unlist(lapply(names(fullTab), | |
function(nm){ | |
gp <- grep(nm, names(tempout), value=FALSE) | |
if(length(gp) == 0) return(nm)} | |
))) | |
for(nm in missingNames){ tempout[[nm]] <- "zzTotal" } | |
tempout | |
} | |
tabOut <- fullTab | |
for(i in 1:length(combos)){ | |
for(j in 1:dim(combos[[i]])[2]){ | |
plytemp <- addTotalNames(ddply(.data, .variables[combos[[i]][,j]], .fun)) | |
tabOut <- merge(tabOut, plytemp, all = TRUE) | |
} | |
} | |
for(var in .variables){ | |
var <- paste(var) | |
if(is.factor(tabOut[[var]])){ | |
levels(tabOut[[var]])[levels(tabOut[[var]])=="zzTotal"] <- "Total" | |
} else{ | |
tabOut[[var]][tabOut[[var]]=="zzTotal"] <- "Total" | |
} | |
} | |
return(tabOut) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment