Last active
October 13, 2015 06:57
-
-
Save dholstius/4156908 to your computer and use it in GitHub Desktop.
Fast rollup of rows from a data.frame (uses data.table for speed)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' aggregate.data.table | |
#' | |
#' Use to quickly aggregate rows from a data.frame | |
#' | |
#' @param x data.table | |
#' @param by list of columns to use for grouping | |
#' @param FUN summary function | |
#' @param \dots further arguments to summary function | |
#' @param is.value determines which of the remaining columns should be aggregated (numeric, POSIXct, character, etc.) | |
#' @author David Holstius \email{david.holstius@berkeley.edu} | |
#' @export | |
aggregate.data.table <- function(x, by, FUN=mean, ..., is.value=is.numeric) { | |
value_columns <- names(x)[which(sapply(x, is.value))] | |
x[,lapply(.SD,FUN,...),eval(substitute(by)),.SDcols=value_columns] | |
} | |
# Example 1 | |
library(data.table) | |
data(diamonds, package='ggplot2') | |
dt <- data.table(diamonds) | |
aggregate(dt, by=list(cut)) # defaults to arithmetic mean | |
# Example 2 | |
GM <- function (x, ...) exp(mean(log(x), ...)) # geometric mean | |
aggregate(dt, by=list(cut, color), FUN=GM, na.rm=TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment