Skip to content

Instantly share code, notes, and snippets.

@dmarcelinobr
Last active December 17, 2015 20:38
Show Gist options
  • Save dmarcelinobr/5668701 to your computer and use it in GitHub Desktop.
Save dmarcelinobr/5668701 to your computer and use it in GitHub Desktop.
#to make the final table, I changed manually the n size every trial by: 1e3, 1e4, 1e5, 1e6, 1e7. Then I also chnaged the benchmark object 'res'
n <- 1e7
set.seed(51)
process <- data.frame(id=sample(100, n, rep=T), x=rnorm(n), y=runif(n), z=rpois(n, 1) pexp(2, rate=1/3) )
all <- multicore:::detectCores(all.tests=TRUE)
if(!require(rbenchmark)){
install.packages("rbenchmark")
} else{
require(rbenchmark)
}
(res5 <-benchmark(replications=100, order="user.self",
lapply = {
cat('---> distributing lapply...\n')
lapply_ <- data.frame(lapply(split(process[-1], process[1]), colMeans))
},
multicore = {
cat('---> distributing mclapply...\n')
if(!require(multicore)){
install.packages("multicore")
} else{
require(multicore)
}
multicore_ <- data.frame(mclapply(split(process[-1], process[1]), colMeans, mc.cores = all))
},
snow = {
if(!require(snow)){
install.packages("snow")
} else{
require(snow)
}
cat('---> distributing parLapply...\n')
cl <- makeCluster(all, type = "SOCK")
snow_ <- data.frame(parLapply(cl, split(process[-1], process[1]), colMeans))
stopCluster(cl)
},
snowfall = {
if(!require(snowfall)){
install.packages("snowfall")
} else{
require(snowfall)
}
cat('---> distributing sfLapply...\n')
cl <- sfInit(parallel=TRUE, all, type = "SOCK")
snowfall_ <- data.frame(sfLapply(split(process[-1], process[1]), colMeans))
sfStop(cl)
})
)
if(!require(compare)){
install.packages("compare")
} else{
require(compare)
}
all.equal(lapply_, multicore_)
all.equal(lapply_, snow_)
all.equal(lapply_, snowfall_)
#Making the final table
res1$trial=as.factor(1e3)
res2$trial=as.factor(1e4)
res3$trial=as.factor(1e5)
res4$trial=as.factor(1e6)
res5$trial=as.factor(1e7)
top1 <- rbind(res1,res2)
top2 <- rbind(res2,res3)
top3 <- rbind(res3,res4)
top4 <- rbind(res4,res5)
top5 <- rbind(res1,res2,res3,res4,res5)
##### Plots
library(ggplot2)
library(gridExtra)
### Full Plot ### Notice that a plot with log scale on y would be better than as is.
(p <-ggplot(top5, aes(x=trial, y=user.self, group = test,
colour=test)) + geom_line(size=3, alpha = I(0.7))+theme_dms()
+ xlab('Vector Size') + ylab('Time in seconds') )
(p1 <-ggplot(top1, aes(x=trial, y=user.self, group = test,
colour=test)) + geom_line(size=3, alpha = I(0.7))
+ ggtitle('From 1e3 to 1e4 rows')
+ xlab('Vector Size') + ylab('Time in seconds'))
(p2 <-ggplot(top2, aes(x=trial, y=user.self, group = test,
colour=test)) + geom_line(size=3, alpha = I(0.7))
+ ggtitle('From 1e4 to 1e5 rows')
+ xlab('Vector Size') + ylab('Time in seconds') )
(p3 <-ggplot(top3, aes(x=trial, y=user.self, group = test,
colour=test)) + geom_line(size=3, alpha = I(0.7))
+ ggtitle('From 1e5 to 1e6 rows')
+ xlab('Vector Size') + ylab('Time in seconds') )
(p4 <-ggplot(top4, aes(x=trial, y=user.self, group = test,
colour=test)) + geom_line(size=3, alpha = I(0.7))
+ ggtitle('From 1e6 to 1e7 rows')
+ xlab('Vector Size') + ylab('Time in seconds'))
jpeg("parallelfinal.jpeg", width=800, height=600, quality=100)
grid.arrange(arrangeGrob(p1, p2, p3,p4, widths=unit.c(unit(0.5, "npc"), unit(0.5, "npc")), heights=unit.c(unit(0.5, "npc"), unit(0.5, "npc")), nrow=2))
jpeg("parallelfinal.jpeg", width=800, height=600, quality=100)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment