Skip to content

Instantly share code, notes, and snippets.

Created March 21, 2011 02:17
Show Gist options
  • Save anonymous/878919 to your computer and use it in GitHub Desktop.
Save anonymous/878919 to your computer and use it in GitHub Desktop.
library(sqldf)
library(doBy)
library(plyr)
library(data.table)
n<-100000
grp1<-sample(1:750, n, replace=T)
grp2<-sample(1:750, n, replace=T)
d<-data.frame(x=rnorm(n), y=rnorm(n), grp1=grp1, grp2=grp2, n,
replace=T)
# sqldf
rsqldf <- system.time(sqldf("select grp1, grp2, avg(x), avg(y) from d
group by grp1, grp2"))
#doBy
rdoby <- system.time(summaryBy(x+y~grp1+grp2, data=d, FUN=c(mean)))
#aggregate
raggregate <- system.time(aggregate(d, list(d$grp1, d$grp2),
function(x)mean(x)))
#plyr
rplyr <- system.time(ddply(d, .(grp1, grp2), summarise, avx = mean(x),
avy=mean(y)))
#data.table
DT = data.table(d)
rdataT <- system.time(DT[,list(mean(x),mean(y)),by=list(grp1,grp2)])
rsqldf
rdoby
raggregate
rplyr
rdataT
library(gplots)
x<-c(rsqldf[3],rdataT[3],rdoby[3],raggregate[3],rplyr[3])
balloonplot( rep("time.elapsed",5),c("sqldf","data.table","doBy","aggregate","plyr"),round(x,1), ylab ="Method", xlab="",sorted=F,dotcolor=rev(heat.colors(5)),main="time.elapsed for different methods of grouping")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment