Skip to content

Instantly share code, notes, and snippets.

@joshbode
Last active December 13, 2015 23:59
Show Gist options
  • Save joshbode/4995928 to your computer and use it in GitHub Desktop.
Save joshbode/4995928 to your computer and use it in GitHub Desktop.
Sub-totals for ddply
# poor-man's rollup
# rollups like in SQL: GROUP BY ROLLUP(...)
# example:
# d = data.frame(x=trunc(runif(25, 1, 10)), y=trunc(runif(25, 1, 5)), z=rnorm(25))
# result = pmr(d, .(x, y, j=x / 2), summarise, p=sum(z), .labels=list(x='-900', y='Total'))
require(plyr)
pmr = function(.data, .variables, ...,
.rollup=names(.variables), .labels=NULL) {
groups = list(NULL) # sentinel to signal running full set of vars
# generate grouping sets
vars = intersect(.rollup, names(.variables))
if (length(vars)) {
groups = append(
groups,
unlist(lapply(1:length(vars), function(i) {
alply(combn(vars, i), 2, identity)
}), recursive=FALSE)
)
}
# roll-up across the grouping sets
blocks = lapply(groups, function(group) {
# determine which columns to ignore in rollup
if (is.null(group)) {
delete = length(.variables) + 1
}
else {
delete = match(group, names(.variables))
}
# run ddply on amended variables
args = append(
list(.data, .variables[-delete]),
as.list(substitute(list(...)))[-1L]
)
result = do.call(ddply, args)
# cleanup and tag results
# grouping column gives a hexmode of the indices of the marginalised columns
if (is.null(group)) {
result$.grouping = 0
}
else {
result$.grouping = sum(2 ^ (delete - 1))
result[group] = NA # NA-out all columns in the group
result$.id = NULL # delete .id column (if present)
# add in rollup labels if provided
labs = data.frame(subset(.labels, names(.labels) %in% group))
result[names(labs)] = labs
}
return(result)
})
# combine the lot
result = do.call(rbind, blocks)
result$.grouping = as.hexmode(result$.grouping)
row.names(result) = 1:nrow(result)
return(result)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment