Skip to content

Instantly share code, notes, and snippets.

@bobthecat
Last active December 14, 2015 03:49
Show Gist options
  • Save bobthecat/5024316 to your computer and use it in GitHub Desktop.
Save bobthecat/5024316 to your computer and use it in GitHub Desktop.
R <- c(2000, 5000, 10000, 20000, 40000)
## I hit the limit at ~50000 the ff function refuse to create the matrix.
# Error in if (length < 0 || length > .Machine$integer.max) stop("length must be between 1 and .Machine$integer.max") :
# missing value where TRUE/FALSE needed
# http://www.bytemining.com/2010/05/hitting-the-big-data-ceiling-in-r/
normal <- numeric(length=length(R))
for(i in 1:length(R)){
split <- ifelse(R[i]<=20000, 10, 20)
MAT <- matrix(rnorm(R[i] * 10), nrow = 10)
normal[i] <- system.time(res <- bigcor(MAT, nblocks = split, verbose=FALSE))[3]
}
parallel <- numeric(length=length(R))
for(i in 1:length(R)){
split <- ifelse(R[i]<=20000, 10, 20)
MAT <- matrix(rnorm(R[i] * 10), nrow = 10)
parallel[i] <- system.time(res <- bigcorPar(MAT, nblocks = split, verbose=FALSE))[3]
}
d <- data.frame(time=c(normal, parallel), type=rep(c("normal", "parallel"), each=length(R)), size=rep(R, 2))
library(ggplot2)
pdf("bigcor_benchmark.pdf", height=7, width=7)
qplot(size, time, data=d, group=type, colour=type, geom=c("point","path"),
xlab="Matrix size", ylab="Time in sec.",
main="Speed comparison bigcor / bigcorPar")
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment