Created
September 28, 2014 05:06
-
-
Save mrdwab/6282a022b2eadb0fe1c6 to your computer and use it in GitHub Desktop.
Benchmarking options at http://stackoverflow.com/questions/26049028/condense-merge-cells-in-a-table-in-r
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Required packages | |
library(data.table) | |
library(dplyr) | |
library(reshape2) | |
library(microbenchmark) | |
## Small sample `data.frame` | |
dat <- structure(list(Pos = c("Position1", "Position2", "Position3", | |
"Position1", "Position3", "Position2"), M = c(34L, 45L, 89L, | |
56L, 54L, 56L), P = c(56L, 23L, 78L, 45L, 35L, 89L)), .Names = c("Pos", | |
"M", "P"), class = "data.frame", row.names = c(NA, -6L)) | |
## Small sample `matrix` | |
m1 <- structure(c(34, 45, 89, 56, 54, 56, 56, 23, 78, 45, 35, 89), .Dim = c(6L, | |
2L), .Dimnames = list(c("Position1", "Position2", "Position3", | |
"Position1", "Position3", "Position2"), c("M", "P"))) | |
## Built up to ~ 1M rows | |
dat <- do.call(rbind, replicate(16667, dat, FALSE)) | |
m1 <- do.call(rbind, replicate(16667, m1, FALSE)) | |
## rnso's double aggregate + merge | |
funRNSO <- function() { | |
a1 = aggregate(M ~ Pos, dat, sum) | |
a2 = aggregate(P ~ Pos, dat, sum) | |
merge(a1, a2) | |
} | |
## my suggestion in comments | |
funRNSOb <- function() aggregate(. ~ Pos, dat, sum) | |
## Richard Scriven | |
funRS <- function() t(sapply(split(dat[-1], dat$Pos), colSums)) | |
## Mine | |
funAM <- function() xtabs(Freq ~ Var1 + Var2, data.frame(as.table(m1))) | |
## Akrun - dplyr | |
funAdplyr <- function() { | |
dat %>% | |
group_by(Pos) %>% | |
summarise_each(funs(sum=sum(., na.rm=TRUE))) | |
} | |
## Akrun - data.table | |
funAdt <- function() { | |
as.data.table(dat)[, lapply(.SD, sum, na.rm=TRUE), by=Pos] | |
} | |
## Akrun - by | |
funAby <- function() { | |
do.call(rbind, by(m1, list(rownames(m1)), colSums, na.rm=TRUE)) | |
} | |
## Akrun - acast | |
funAacast <- function() { | |
acast(melt(m1), Var1~Var2, value.var="value", sum, na.rm=TRUE) | |
} | |
system.time(funRNSO()) | |
# user system elapsed | |
# 3.20 0.00 3.22 | |
system.time(funRNSOb()) | |
# user system elapsed | |
# 1.72 0.00 1.72 | |
## ^^ Imagine if we had to aggregate more than 2 columns! | |
microbenchmark(tapply = funRS(), | |
xtabs = funAM(), | |
dplyr = funAdplyr(), | |
datatable = funAdt(), | |
by = funAby(), | |
acast = funAacast(), | |
times = 20) | |
# Unit: milliseconds | |
# expr min lq median uq max neval | |
# tapply 155.003662 164.664556 175.49503 209.98393 275.68028 20 | |
# xtabs 463.060971 520.557225 580.92654 611.13589 720.41927 20 | |
# dplyr 17.555662 19.042972 21.12384 24.53657 29.02181 20 | |
# datatable 7.277477 8.253766 11.49202 18.26767 26.67410 20 | |
# by 232.103088 255.488470 288.51957 318.80311 378.95220 20 | |
# acast 159.206876 172.837925 213.22048 253.78435 331.10764 20 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment