Skip to content

Instantly share code, notes, and snippets.

@jimhester
Last active March 19, 2017 05:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jimhester/e725e1ad50a5a62f3dee to your computer and use it in GitHub Desktop.
Save jimhester/e725e1ad50a5a62f3dee to your computer and use it in GitHub Desktop.
# benchmarks from http://www.win-vector.com/blog/2015/07/efficient-accumulation-in-r/#more-3295
mkRow <- function(nCol) {
x <- as.list(rnorm(nCol))
# make row mixed types by changing first column to string
x[[1]] <- ifelse(x[[1]]>0,'pos','neg')
names(x) <- paste('x',seq_len(nCol),sep='.')
x
}
mkFrameForLoop <- function(nRow,nCol) {
set.seed(0)
d <- c()
for(i in seq_len(nRow)) {
ri <- mkRow(nCol)
di <- data.frame(ri,
stringsAsFactors=FALSE)
d <- rbind(d,di)
}
d
}
mkFrameList <- function(nRow,nCol) {
set.seed(0)
d <- lapply(seq_len(nRow),function(i) {
ri <- mkRow(nCol)
data.frame(ri,
stringsAsFactors=FALSE)
})
do.call(rbind,d)
}
mkFrameBindRows <- function(nRow,nCol) {
set.seed(0)
d <- lapply(seq_len(nRow),function(i) {
ri <- mkRow(nCol)
data.frame(ri,
stringsAsFactors=FALSE)
})
dplyr::bind_rows(d)
}
mkFrameVector <- function(nRow,nCol) {
set.seed(0)
res <- c(list(character(nRow)), replicate(nCol - 1, numeric(nRow), simplify = FALSE))
for(i in seq_len(nRow)) {
ri <- mkRow(nCol)
for(j in seq_along(ri)) {
res[[j]][i] <- ri[[j]]
}
}
# fix the names
names(res) <- paste('x',
seq_len(nCol),sep='.')
data.frame(res, stringsAsFactors = FALSE)
}
mkRow2 <- function(nCol, fileloc) {
x <- as.list(rnorm(nCol))
# make row mixed types by changing first column to string
x[[1]] <- ifelse(x[[1]]>0,'pos','neg')
writeLines(paste(x, collapse=" "), con = fileloc)
}
avoidAggregate <- function(nRow,nCol) {
set.seed(0)
some.file <- tempfile()
file.create(some.file)
cf <- file(some.file, open="a")
lapply(seq_len(nRow),function(i) {mkRow2(nCol, cf)})
close(cf)
numnumer <- rep("numeric", nCol - 1)
res <- read.table(some.file, nrows=nRow,
colClasses= c("character", numnumer))
names(res) <- paste('x',
seq_len(nCol),sep='.')
res
}
mkRowC <- function(nCol) {
xN <- rnorm(nCol)
x <- as.character(xN)
x[[1]] <- ifelse(xN[[1]]>0,'pos','neg')
x
}
mkFrameMat <- function(nRow,nCol) {
set.seed(0)
d <- matrix(data="",
nrow=nRow,ncol=nCol)
for(i in seq_len(nRow)) {
ri <- mkRowC(nCol)
d[i,] <- ri
}
d <- data.frame(d,
stringsAsFactors=FALSE)
if(nCol>1) {
for(i in 2:nCol) {
d[[i]] <- as.numeric(d[[i]])
}
}
names(d) <- paste('x',
seq_len(nCol),sep='.')
d
}
library(microbenchmark)
library(compile)
rows = 10000
columns = 100
microbenchmark(a <- mkFrameForLoop(rows, columns),
b <- mkFrameList(rows, columns),
d <- mkFrameBindRows(rows, columns),
e <- avoidAggregate(rows, columns),
ec <- cmpfun(avoidAggregate)(rows, columns),
f <- mkFrameVector(rows, columns),
fc <- cmpfun(mkFrameVector)(rows, columns),
g <- mkFrameMat(rows, columns),
gc <- cmpfun(mkFrameMat)(rows, columns),
times = 1)
# Unit: milliseconds
# expr min lq mean median uq max neval
# a <- mkFrameForLoop(rows, columns) 145496.8312 145496.8312 145496.8312 145496.8312 145496.8312 145496.8312 1
# b <- mkFrameList(rows, columns) 63811.0009 63811.0009 63811.0009 63811.0009 63811.0009 63811.0009 1
# d <- mkFrameBindRows(rows, columns) 42721.5881 42721.5881 42721.5881 42721.5881 42721.5881 42721.5881 1
# e <- avoidAggregate(rows, columns) 3618.5261 3618.5261 3618.5261 3618.5261 3618.5261 3618.5261 1
# ec <- cmpfun(avoidAggregate)(rows, columns) 3666.2685 3666.2685 3666.2685 3666.2685 3666.2685 3666.2685 1
# f <- mkFrameVector(rows, columns) 2809.1274 2809.1274 2809.1274 2809.1274 2809.1274 2809.1274 1
# fc <- cmpfun(mkFrameVector)(rows, columns) 689.0486 689.0486 689.0486 689.0486 689.0486 689.0486 1
# g <- mkFrameMat(rows, columns) 1182.1522 1182.1522 1182.1522 1182.1522 1182.1522 1182.1522 1
# gc <- cmpfun(mkFrameMat)(rows, columns) 1932.6838 1932.6838 1932.6838 1932.6838 1932.6838 1932.6838 1
all.equal(a, b)
all.equal(a, d, check.attributes = FALSE) # dplyr adds a few class names to the data frame
all.equal(a, e)
all.equal(a, ec)
all.equal(a, f)
all.equal(a, fc)
all.equal(a, g)
all.equal(a, gc)
# run the fast ones a few times to get more accurate timings
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# e <- avoidAggregate(rows, columns) 3460.6368 3492.7273 3572.8147 3556.015 3675.985 3678.709 5 d
# ec <- cmpfun(avoidAggregate)(rows, columns) 3424.0553 3492.4201 3548.3598 3570.829 3596.581 3657.914 5 d
# f <- mkFrameVector(rows, columns) 2115.1614 2400.8824 2813.8144 2710.366 3155.541 3687.122 5 c
# fc <- cmpfun(mkFrameVector)(rows, columns) 575.5463 631.7447 708.3951 635.345 665.834 1033.506 5 a
# g <- mkFrameMat(rows, columns) 1279.0403 1286.2203 1328.8007 1322.776 1343.263 1412.704 5 b
# gc <- cmpfun(mkFrameMat)(rows, columns) 1228.0720 1299.3356 1419.9762 1330.457 1423.105 1818.911 5 b
@russellpierce
Copy link

You may mean library(compiler) rather than library(compile) or is compile a non-CRAN package?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment