Skip to content

Instantly share code, notes, and snippets.

@henfiber
Forked from ronert/compare_compilers.R
Last active October 5, 2022 08:54
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save henfiber/e2af7e79f44781cde5ad to your computer and use it in GitHub Desktop.
Save henfiber/e2af7e79f44781cde5ad to your computer and use it in GitHub Desktop.
Updated combo benchmark (25.R,RevoR,loess,LAPACK benchmarks)
#####################
## Updated and combo benchmark
## Including :
## R-benchmark-25.R from Simon Urbanek
## Revolution Analytics benchmarks : http://www.revolutionanalytics.com/revolution-revor-enterprise-benchmark-details
## Extra benchmarks (loess and eigen)
## LAPACK benchmarks (QR,eigen,cholesky,svd,prcomp,balance,kappa,norm,solve)
## TODO:
## add extra functions like dist() and cluster() - also provided by gputools
## separate the gpu from cpu benchmarks, and the analysis from the benchmark code
## precompute the matrices used throughout the experiments
library(compiler)
library(methods)
set.seed(1)
##########################
# Helper functions
##########################
# return a string with found BLAS/LAPACK library filenames
get_numlib <- function(blas_fallback="libblas", lapack_fallback="liblapack") {
if(exists("getMKLthreads", mode="function")) {
numlib <- "MKL_MKL"
} else {
blas <- blas_fallback; lapack <- lapack_fallback
cmd <- paste0("lsof -p ", Sys.getpid(), " 2>/dev/null |
grep 'lib.*blas\\|lib.*lapack\\|lib.*atlas' |
awk -v FS='/' '{$0=substr($NF,1,index($NF,\".\")-1)} 1'")
p <- pipe(cmd)
libs <- readLines(p); close(p)
if(any(grepl("atlas", libs))) {
str <- libs[grep("atlas", libs)]
blas <- unlist(strsplit(str, split=".", fixed = T))[1]
lapack <- blas
}
if(any(grepl("openblas", libs))) {
str <- libs[grep("openblas", libs)]
blas <- unlist(strsplit(str, split="-"))[1]
}
if(length(blas) == 0 || blas=="") blas <- blas_fallback
if(length(lapack) == 0 || lapack=="") lapack <- lapack_fallback
numlib <- paste(blas, lapack, sep = "_")
# Compiling the C function to set OpenBLAS number of threads for OpenMP version
# needed to run the implicit parallelism test, otherwise it fails
if(grepl("openblaso", blas)) {
if("OpenBlasThreads" %in% rownames(installed.packages())) {
library(OpenBlasThreads)
} else {
message("Compiling C inline function openblas.set.num.threads()")
compile_openblas_num_threads()
}
}
}
numlib
}
# Get a unique hash for this host using Sys.info()
# The system host name can be found using this process:
# First, this function checks the system environment variables HOST, HOSTNAME, and COMPUTERNAME.
# Second, it checks Sys.info()["nodename"] for host name details.
# Finally, it tries to query the system command uname -n.
build_sysinfo_hash <- function(){
return(substr(digest::digest(Sys.info(), "md5"), 1, 5))
}
# build a benchmark name using r version, jit status and loaded math libraries
build_benchmark_name <- function(jit_level=0) {
numlib <- tryCatch(get_numlib(),
error = function(e) warning(paste0("While trying to detect the math libs : ", e$message)))
name <- paste( # R.version$os,
as.character(getRversion()),
paste0("j", jit_level),
numlib,
sep = "_")
if(interactive()) {
response <- readline(paste0('The automatic benchmark name is "', name, '". ',
"Press enter to accept. OR type a new custom name :\n"))
if(response != "") name <- response
}
name
}
# Compile inline C function to set OpenBLAS number of threads
# Use as: openblas.set.num.threads(4)
compile_openblas_num_threads <- function() {
require(inline)
openblas.set.num.threads <<- cfunction(signature(ipt="integer"),
body = 'openblas_set_num_threads(*ipt);', language = "C", convention = ".C",
otherdefs = c ('extern void openblas_set_num_threads(int);'),
libargs = c ('-L/usr/lib64/ -lblas'))
}
##########################
# Measurement functions
##########################
init_measurements <- function() {
if(!exists("measurements") || is.null(measurements)) {
measurements <<- data.frame(timestamp=format(Sys.time(), "%Y%m%dT%H%M%S"))
}
}
add_measurement <- function(name,value) {
measurements[1, name] <<- value
}
# Save measuremt in an .rds file
save_measurements <- function() {
if(is.null(measurements)) {
return(2)
}
if(!dir.exists("benchmarks_results")) {
stop("Cannot find the benchmarks results folder. chdir to the appropriate location")
}
if(is.null(benchmark_name)) {
if(interactive())
benchmark_name <<- readline("Benchmark name? (unique - will override identical benchmark files) : ")
else
benchmark_name <<- build_benchmark_name(0)
}
if(!exists("host_hash")) {
host_hash <- build_sysinfo_hash()
if(interactive()) {
response <- readline(paste0("Type a custom hostname if you want here. Otherwise the automatically generated hash ",
host_hash,
" will be used : "))
if(nchar(response)) host_hash <- response
}
}
saveRDS(measurements, file=paste0("benchmarks_results/bench_", benchmark_name, "_",
format(Sys.time(), "%Y%m%dT%H%M%S"), "_",
host_hash,
".rds"))
}
# Run and measure a benchmarking function/expression
run_and_measure <- function(name, expr, runs=1){
#expr <- substitute(expr) # Prevent early evaluation
init_measurements()
cat("###", name, "\n\n")
timing <- unclass(system.time(expr))[1:3]
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
# Print the timing
print(timing)
cat("\n")
}
# MAIN function - Run a whole experiment
run_experiment <- function(expr, jit_level=0) {
library(compiler)
enableJIT(jit_level)
# Use the same seed across runs
set.seed(1)
gc()
# Initialize measurements data frame
measurements <<- data.frame(timestamp=format(Sys.time(), "%Y%m%dT%H%M%S"))
# set the benchmark name
benchmark_name <<- build_benchmark_name(jit_level)
rownames(measurements)[1] <<- benchmark_name
# Run the experiment
cat("# EXPERIMENT:", benchmark_name, "\n\n")
expr
cat("\n")
}
#####################################
# Loading and ranking experiments
#####################################
# Load measurements saved in .rds files
load_measurements <- function(pattern='bench_.*', this_engine=TRUE){
if(!dir.exists("benchmarks_results")) {
stop("Cannot find the benchmarks results folder. chdir to the appropriate location")
}
if(this_engine)
pattern <- paste0(pattern, build_sysinfo_hash(), "[.]+rds")
else
pattern <- paste0(pattern, "[.]+rds")
found <- list.files(path = "benchmarks_results", pattern = pattern)
if(length(found) > 0) {
past_measurements <<- data.frame(timestamp=format(Sys.time(), "%Y%m%dT%H%M%S"), stringsAsFactors = F)
for (f in found) {
cur <- readRDS(paste0("benchmarks_results/", f))
name <- rownames(cur)
if(!this_engine) name <- paste0(name, "_", sub("(.*_)([A-Za-z0-9]+)[.]+rds", "\\2", f))
cur <- cbind(data.frame(name=name, stringsAsFactors = F), cur)
past_measurements <<- merge(past_measurements, cur, all=T)
}
past_measurements <<- past_measurements[complete.cases(past_measurements), ]
} else {
message("No past experiments have been found.")
}
}
load_memory_measurements <- function(pattern='*.csv') {
if(!dir.exists("benchmarks_results")) {
stop("Cannot find the benchmarks results folder. chdir to the appropriate location")
}
past_measurements <<- do.call("rbind", lapply(list.files(path = "benchmarks_results",
pattern = pattern), function(f) {
d <- read.csv(paste0("benchmarks_results/", f), stringsAsFactors = F)
values <- t(d$mem_kb)
colnames(values) <- t(d$func)
rownames(values) <- f
data.frame(values)
}))
}
# Annotate experiments extracting info from the experiment name
annotate_experiments <- function() {
if(!exists("past_measurements"))
load_measurements()
m <- past_measurements
single_threaded <- c("libblas", "liblapack", "libsatlas", "libopenblas")
openblas_versions <- c("libopenblas", "libopenblasp", "libopenblaso")
atlas_versions <- c("libtatlas", "libsatlas")
mkl_versions <- c("MKL")
# Parse information from experiment name
s <- strsplit(m$name, '_')
d <- do.call("rbind", lapply(s, function(l){
data.frame( r_version = l[1],
jit = substr(l[2], 2, 2),
blas = l[3],
lapack = if(length(l)>3) l[4] else l[3]
)
}))
d$blas_opt = as.factor(ifelse(d$blas == "libblas", "default", "optimized"))
d$lapack_opt = as.factor(ifelse(d$lapack == "liblapack", "default", "optimized"))
d$blas_threaded = as.factor(ifelse(d$blas %in% single_threaded, FALSE, TRUE))
d$lapack_threaded = as.factor(ifelse(d$lapack %in% single_threaded, FALSE, TRUE))
d$blas_family = as.factor(ifelse(d$blas %in% openblas_versions, "openblas",
ifelse(d$blas %in% atlas_versions, "atlas",
ifelse(d$blas %in% mkl_versions, "mkl", "default"))))
(m <- cbind(d, m))
}
# Rank experiments - according to different scores
# Overall/clock-time means, most wins, highest ratios etc.
rank_experiments <- function() {
if(!exists("past_measurements"))
load_measurements()
# Index some categories of columns
cols_numeric <- 3:ncol(past_measurements)
cols_clock <- which(!grepl("^cput_", colnames(past_measurements)))
cols_clock <- cols_clock[-c(1,2)]
cols_cput <- which(grepl("^cput_", colnames(past_measurements)))
cols_parallel <- which(grepl("_PARALLEL$", colnames(past_measurements)))
cols_aggregated <- which(grepl("_ALL$", colnames(past_measurements)))
# We define geometric mean and trimmed geommetric mean
gmean <- function(x) {exp(mean(log(x)))}
trgmean <- function(x) {sort(x); exp(mean(log(x[2:length(x)-1])))}
# Rank by arithmetic means
means <- rowMeans(past_measurements[, cols_numeric])
print("Rank by overall arithmetic means")
print(paste(past_measurements$name[order(means)], sort(means)))
# Rank by clock-time means
clock_means <- rowMeans(past_measurements[, cols_clock])
print("Rank by clock-time means")
print(paste(past_measurements$name[order(clock_means)], sort(clock_means)))
# Rank by Geometric means
geom_means <- apply(past_measurements[, cols_numeric], 1, gmean)
print("Rank by overall geometric means")
print(paste(past_measurements$name[order(geom_means)], sort(geom_means)))
# Rank by Geometric clock-time means
clock_geom_means <- apply(past_measurements[, cols_clock], 1, gmean)
print("Rank by clock-time geometric means")
print(paste(past_measurements$name[order(clock_geom_means)], sort(clock_geom_means)))
# Rank by most wins
wins <- apply(past_measurements[, cols_numeric], 2, function(x) which.min(x))
wins <- data.frame(idx=names(table(wins)), wins=as.numeric(table(wins)), stringsAsFactors = F)
# Add missing values, i.e. the experiments with no wins
wins <- merge(data.frame(idx=1:nrow(past_measurements)), wins, all = T)
wins$wins[is.na(wins$wins)] <- 0
# Print
print("Rank by most wins")
print(paste(past_measurements[order(wins$wins, decreasing=T), ]$name, sort(wins$wins, decr=T)))
# Rank by most clock-time wins
wins <- apply(past_measurements[, cols_clock], 2, function(x) which.min(x))
wins <- data.frame(idx=names(table(wins)), wins=as.numeric(table(wins)), stringsAsFactors = F)
# Add missing values, i.e. the experiments with no wins
wins <- merge(data.frame(idx=1:nrow(past_measurements)), wins, all = T)
wins$wins[is.na(wins$wins)] <- 0
# Print
print("Rank by most clock-time wins")
print(paste(past_measurements[order(wins$wins, decreasing=T), ]$name, sort(wins$wins, decr=T)))
# Rank by most impressive win!
best <- apply(past_measurements[, cols_clock], 2, function(x) which.min(x))
best <- data.frame(idx=names(best), name=past_measurements$name[best], stringsAsFactors = F)
best$ratio <- apply(past_measurements[, cols_clock], 2, function(x) max(x)/min(x))
best <- best[order(best$ratio, decreasing = T), ]
print("Rank by the highest speedup/ratio in a single score")
print(best)
# Rank by the most marginal winner! (the highest difference from the second)
best_diff <- do.call("rbind", apply(past_measurements[, cols_clock], 2, function(x) {
best <- which.min(x)
x <- sort(x)
data.frame(id=best, ratio=x[2]/x[1], stringsAsFactors = F)
}))
best_diff$name <- past_measurements$name[best_diff$id]
best_diff <- best_diff[order(best_diff$ratio, decreasing = T), ]
print("Rank by the highest marginal difference from the second in a single score")
print(best_diff)
# Rank by highest parallelization ratio - means_cput / means_clock
cols <- colnames(past_measurements)
cols_cput_paired <- setdiff(setdiff(cols_cput, cols_parallel), cols_aggregated)
cols_clock_paired <- which(cols %in% sub("cput_", "clock_", cols[cols_cput_paired]))
cput_clock_ratios <- rowMeans(past_measurements[, cols_cput_paired]) /
rowMeans(past_measurements[, cols_clock_paired])
print("Rank by highest clock-time/cpu-time ratio")
print(paste(past_measurements$name[order(cput_clock_ratios, decreasing = T)],
sort(cput_clock_ratios, decreasing = T)))
# Rank by overall scaled arithmetic means of CPU times
clock_scaled_means <- rowMeans(scale(past_measurements[, cols_cput], center = F))
print("Rank by overall scaled CPU times arithmetic means")
print(paste(past_measurements$name[order(clock_scaled_means)], sort(clock_scaled_means)))
# Rank by overall scaled arithmetic means of clock times
clock_scaled_means <- rowMeans(scale(past_measurements[, cols_clock], center = F))
print("Rank by overall scaled clock-times arithmetic means")
print(paste(past_measurements$name[order(clock_scaled_means)], sort(clock_scaled_means)))
# Rank by overall scaled geometric means (trimmed) of clock times
clock_scaled_gmeans <- apply(scale(past_measurements[, cols_clock], center = F),
1, trgmean)
print("Rank by overall scaled clock-times (trimmed) geometric means")
print(paste(past_measurements$name[order(clock_scaled_gmeans)], sort(clock_scaled_gmeans)))
}
top_experiments <- function(n_best=8) {
if(!exists("past_measurements"))
load_measurements()
cols_numeric <- 3:ncol(past_measurements)
cols_clock <- which(!grepl("^cput_", colnames(past_measurements)))
cols_clock <- cols_clock[-c(1,2)]
# Rank by most clock-time wins
wins <- apply(past_measurements[, cols_clock], 2, function(x) which.min(x))
wins <- data.frame(idx=names(table(wins)), wins=as.numeric(table(wins)), stringsAsFactors = F)
# Add missing values, i.e. the experiments with no wins
wins <- merge(data.frame(idx=1:nrow(past_measurements)), wins, all = T)
wins$wins[is.na(wins$wins)] <- 0
head(past_measurements[order(wins$wins, decreasing=T), ]$name, n_best)
}
plot_experiments <- function(){
library(ggplot2)
library(reshape2)
if(!exists("past_measurements"))
load_measurements()
# the first column is a timestamp, the second an id of the experiment
measure.vars <- colnames(past_measurements)[-c(1,2)]
# define the attribute columns
ids <- c("name", "blas", "lapack", "jit",
"r_version", "blas_opt", "lapack_opt",
"blas_threaded", "lapack_threaded")
# Annotate experiments with metadata about the math libraries, r version and jit level used
m <- annotate_experiments()
# columns with cpu-time measurements
cols_cput <- colnames(m)[which(grepl("^cput_", colnames(m)))]
# columns with geometric means (cloc-time) measurements
cols_gmean <- colnames(m)[which(grepl("^trimgmean_", colnames(m)))]
# columns with aggregated clock-time measurements
cols_total <- colnames(m)[which(grepl("^clock.*_ALL$|^clock_PARALLEL$", colnames(m)))]
# columns with clock-time measurements
cols_clock <- colnames(m)[which(grepl("^clock_|^trimgmean_", colnames(m)))]
cols_clock <- setdiff(cols_clock, cols_total)
# Convert measurements to long format
m_melt <- melt(m, id.vars = ids,
measure.vars = measure.vars)
m_melt$measurement <- as.character(m_melt$variable)
m_melt$value <- as.numeric(m_melt$value)
# Plot single columns
qplot(name, clock_PARALLEL, data=m) + coord_flip()
# Plot multiple measurements
measurements_total <- subset(m_melt, measurement %in% cols_total)
measurements_clock <- subset(m_melt, measurement %in% cols_clock)
measurements_clock_threaded <- subset(measurements_clock, blas_threaded == TRUE)
measurements_openblasp <- subset(measurements_clock, blas == "libopenblasp")
measurements_clock_top <- subset(measurements_clock, name %in% top_experiments())
# Line charts
g <- ggplot(measurements_total, aes(x=measurement, y=value, color=name, group=name)) +
scale_fill_brewer(type="qual", palette = 1) +
theme(axis.text.x = element_text(angle=90))
g + geom_line(data=measurements_clock_threaded, position = "jitter")
# Bar charts
g <- ggplot(measurements_total, aes(x=measurement, y=value, fill=name)) +
scale_fill_brewer(type="qual")
# Bar chart of cols_total
g + geom_bar(data=measurements_total, stat="identity", position="dodge")
# Bar chart of cols_clock
g + geom_bar(data=measurements_clock, stat="identity", position="dodge") + coord_flip()
# Bar chart of cols_clock for threaded libraries
g + geom_bar(data=measurements_clock_threaded, stat="identity", position="dodge") + coord_flip()
# Bar chart of cols_clock for topN experiments across other settings
g + geom_bar(data=measurements_clock_top, stat="identity", position="dodge") + coord_flip()
}
##########################################
## Benchmark functions
##########################################
## R Benchmark 2.5 ##
## adapted as a function
# http://r.research.att.com/benchmarks/R-benchmark-25.R
# R Benchmark 2.5 (06/2008) [Simon Urbanek]
# version 2.5: scaled to get roughly 1s per test, R 2.7.0 @ 2.6GHz Mac Pro
# R Benchmark 2.4 (06/2008) [Simon Urbanek]
# version 2.4 adapted to more recent Matrix package
# R Benchmark 2.3 (21 April 2004)
# Warning: changes are not carefully checked yet!
# version 2.3 adapted to R 1.9.0
# Many thanks to Douglas Bates (bates@stat.wisc.edu) for improvements!
# version 2.2 adapted to R 1.8.0
# version 2.1 adapted to R 1.7.0
# version 2, scaled to get 1 +/- 0.1 sec with R 1.6.2
# using the standard ATLAS library (Rblas.dll)
# on a Pentium IV 1.6 Ghz with 1 Gb Ram on Win XP pro
# revised and optimized for R v. 1.5.x, 8 June 2002
# Requires additionnal libraries: Matrix, SuppDists
# Author : Philippe Grosjean
# eMail : phgrosjean@sciviews.org
# Web : http://www.sciviews.org
# License: GPL 2 or above at your convenience (see: http://www.gnu.org)
#
# Several tests are adapted from the Splus Benchmark Test V. 2
# by Stephan Steinhaus (stst@informatik.uni-frankfurt.de)
# Reference for Escoufier's equivalents vectors (test III.5):
# Escoufier Y., 1970. Echantillonnage dans une population de variables
# aleatoires réelles. Publ. Inst. Statis. Univ. Paris 19 Fasc 4, 1-47.
####
rbenchmark <- function(runs=3) {
set.seed (1)
runs <- runs # Number of times the tests are executed
times <- rep(0, 15); dim(times) <- c(5,3)
require(Matrix) # Optimized matrix operations
# Runif <- rMWC1019 # The fast uniform number generator
Runif <- runif
# If you don't have SuppDists, you can use: Runif <- runif
# a <- rMWC1019(10, new.start=TRUE, seed=492166) # Init. the generator
# Rnorm <- rziggurat # The fast normal number generator
# If you don't have SuppDists, you can use: Rnorm <- rnorm
# b <- rziggurat(10, new.start=TRUE) # Init. the generator
Rnorm <- rnorm
# remove("a", "b")
options(object.size=100000000)
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
cat("## Synthetic benchmarks (2.5)\n\n")
cat(c("Number of times each test is run__________________________: ", runs))
cat("\n\n")
cat("### I. Matrix calculation\n\n")
# (1)
name = "MAT_NEW_T_DEF"
cumulate <- numeric(5); a <- 0; b <- 0
for (i in 1:runs) {
invisible(gc())
timing <- system.time({
a <- matrix(Rnorm(2500*2500)/10, ncol=2500, nrow=2500);
b <- t(a);
dim(b) <- c(1250, 5000);
a <- t(b)
})
cumulate <- cumulate + timing
}
remove("a", "b")
timing <- cumulate/runs
times[1, 1] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Creation, transp., deformation of a 2500x2500 matrix (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (2)
name = "MAT_POW_2500"
cumulate <- numeric(5); b <- 0
for (i in 1:runs) {
a <- abs(matrix(Rnorm(2500*2500)/2, ncol=2500, nrow=2500));
invisible(gc())
timing <- system.time({
b <- a^1000
})
cumulate <- cumulate + timing
}
remove("a", "b")
timing <- cumulate/runs
times[2, 1] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("2500x2500 normal distributed random matrix ^1000____ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (3)
name = "SORT_7M"
cumulate <- numeric(5); b <- 0
for (i in 1:runs) {
a <- Rnorm(7000000)
invisible(gc())
timing <- system.time({
b <- sort(a, method="quick") # Sort is modified in v. 1.5.x
})
cumulate <- cumulate + timing
}
remove("a", "b")
timing <- cumulate/runs
times[3, 1] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Sorting of 7,000,000 random values__________________ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (4)
name = "CROSS_PROD_2800"
cumulate <- numeric(5); b <- 0
for (i in 1:runs) {
a <- Rnorm(2800*2800); dim(a) <- c(2800, 2800)
invisible(gc())
timing <- system.time({
b <- crossprod(a) # equivalent to: b <- t(a) %*% a
})
cumulate <- cumulate + timing
}
remove("a", "b")
timing <- cumulate/runs
times[4, 1] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("2800x2800 cross-product matrix (b = a' * a)_________ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (5)
name = "LM_CUSTOM"
cumulate <- numeric(5); c <- 0; qra <-0
for (i in 1:runs) {
a <- new("dgeMatrix", x = Rnorm(2000*2000), Dim = as.integer(c(2000,2000)))
b <- as.double(1:2000)
invisible(gc())
timing <- system.time({
c <- solve(crossprod(a), crossprod(a,b))
})
cumulate <- cumulate + timing
# This is the old method
# a <- Rnorm(600*600); dim(a) <- c(600,600)
# b <- 1:600
# invisible(gc())
# timing <- system.time({
# qra <- qr(a, tol = 1e-7);
# c <- qr.coef(qra, b)
# #Rem: a little faster than c <- lsfit(a, b, inter=F)$coefficients
# })[3]
# cumulate <- cumulate + timing
}
remove("a", "b", "c", "qra")
timing <- cumulate/runs
times[5, 1] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Linear regr. over a 3000x3000 matrix (c = a \\ b')___ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# Summary of Matrix calculation tests
times[ , 1] <- sort(times[ , 1])
cat(" --------------------------------------------\n")
cat(c(" Trimmed geom. mean (2 extremes eliminated): ", exp(mean(log(times[2:4, 1]))), "\n\n"))
# Add the overall measurement
add_measurement(paste0("trimgmean_", "MATRIX_CALC"), exp(mean(log(times[2:4, 1]))))
cat("### II. Matrix functions\n\n")
if (R.Version()$os == "Win32") flush.console()
# (1)
name = "FFT"
cumulate <- numeric(5); b <- 0
for (i in 1:runs) {
a <- Rnorm(2400000)
invisible(gc())
timing <- system.time({
b <- fft(a)
})
cumulate <- cumulate + timing
}
remove("a", "b")
timing <- cumulate/runs
times[1, 2] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("FFT over 2,400,000 random values____________________ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (2)
name = "EIGEN_600"
cumulate <- numeric(5); b <- 0
for (i in 1:runs) {
a <- array(Rnorm(600*600), dim = c(600, 600))
# Only needed if using eigen.Matrix(): Matrix.class(a)
invisible(gc())
timing <- system.time({
b <- eigen(a, symmetric=FALSE, only.values=TRUE)$Value
# Rem: on my machine, it is faster than:
# b <- La.eigen(a, symmetric=F, only.values=T, method="dsyevr")$Value
# b <- La.eigen(a, symmetric=F, only.values=T, method="dsyev")$Value
# b <- eigen.Matrix(a, vectors = F)$Value
})
cumulate <- cumulate + timing
}
remove("a", "b")
timing <- cumulate/runs
times[2, 2] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Eigenvalues of a 640x640 random matrix______________ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (3)
name = "DET_2500"
cumulate <- numeric(5); b <- 0
for (i in 1:runs) {
a <- Rnorm(2500*2500); dim(a) <- c(2500, 2500)
#Matrix.class(a)
invisible(gc())
timing <- system.time({
#b <- determinant(a, logarithm=F)
# Rem: the following is slower on my computer!
# b <- det.default(a)
b <- det(a)
})
cumulate <- cumulate + timing
}
remove("a", "b")
timing <- cumulate/runs
times[3, 2] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Determinant of a 2500x2500 random matrix____________ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (4)
name = "CHOL_3K"
cumulate <- numeric(5); b <- 0
for (i in 1:runs) {
a <- crossprod(new("dgeMatrix", x = Rnorm(3000*3000),
Dim = as.integer(c(3000, 3000))))
invisible(gc())
#a <- Rnorm(900*900); dim(a) <- c(900, 900)
#a <- crossprod(a, a)
timing <- system.time({
b <- chol(a)
})
cumulate <- cumulate + timing
}
remove("a", "b")
timing <- cumulate/runs
times[4, 2] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Cholesky decomposition of a 3000x3000 matrix________ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (5)
name = "INVERSE_1600"
cumulate <- numeric(5); b <- 0
for (i in 1:runs) {
a <- new("dgeMatrix", x = Rnorm(1600*1600), Dim = as.integer(c(1600, 1600)))
invisible(gc())
# a <- Rnorm(400*400); dim(a) <- c(400, 400)
timing <- system.time({
# b <- qr.solve(a)
# Rem: a little faster than
b <- solve(a)
})
cumulate <- cumulate + timing
}
remove("a", "b")
timing <- cumulate/runs
times[5, 2] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Inverse of a 1600x1600 random matrix________________ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# Total time for Matrix functions
times[ , 2] <- sort(times[ , 2])
cat(" --------------------------------------------\n")
cat(c(" Trimmed geom. mean (2 extremes eliminated): ", exp(mean(log(times[2:4, 2]))), "\n\n"))
# Add the overall measurement
add_measurement(paste0("trimgmean_", "MATRIX_FUNC"), exp(mean(log(times[2:4, 2]))))
cat("### III. Programmation\n\n")
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (1)
name = "FIBO_3.5M"
cumulate <- numeric(5); a <- 0; b <- 0; phi <- 1.6180339887498949
for (i in 1:runs) {
a <- floor(Runif(3500000)*1000)
invisible(gc())
timing <- system.time({
b <- (phi^a - (-phi)^(-a))/sqrt(5)
})
cumulate <- cumulate + timing
}
remove("a", "b", "phi")
timing <- cumulate/runs
times[1, 3] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("3,500,000 Fibonacci numbers calculation (vector calc)(sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (2)
name = "HILBERT_3K"
cumulate <- numeric(5); a <- 3000; b <- 0
for (i in 1:runs) {
invisible(gc())
timing <- system.time({
b <- rep(1:a, a); dim(b) <- c(a, a);
b <- 1 / (t(b) + 0:(a-1))
# Rem: this is twice as fast as the following code proposed by R programmers
# a <- 1:a; b <- 1 / outer(a - 1, a, "+")
})
cumulate <- cumulate + timing
}
remove("a", "b")
timing <- cumulate/runs
times[2, 3] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Creation of a 3000x3000 Hilbert matrix (matrix calc) (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (3)
name = "GCD_400K"
cumulate <- numeric(5); c <- 0
# Define the gcd function
gcd2 <- function(x, y) {if (sum(y > 1.0E-4) == 0) x else {y[y == 0] <- x[y == 0]; Recall(y, x %% y)}}
for (i in 1:runs) {
a <- ceiling(Runif(400000)*1000)
b <- ceiling(Runif(400000)*1000)
invisible(gc())
timing <- system.time({
c <- gcd2(a, b) # gcd2 is a recursive function
})
cumulate <- cumulate + timing
}
remove("a", "b", "c", "gcd2")
timing <- cumulate/runs
times[3, 3] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Grand common divisors of 400,000 pairs (recursion)__ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (4)
name = "TOEPLITZ_500"
cumulate <- numeric(5); b <- 0
for (i in 1:runs) {
b <- rep(0, 500*500); dim(b) <- c(500, 500)
invisible(gc())
timing <- system.time({
# Rem: there are faster ways to do this
# but here we want to time loops (220*220 'for' loops)!
for (j in 1:500) {
for (k in 1:500) {
b[k,j] <- abs(j - k) + 1
}
}
})
cumulate <- cumulate + timing
}
remove("b", "j", "k")
timing <- cumulate/runs
times[4, 3] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Creation of a 500x500 Toeplitz matrix (loops)_______ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
# (5)
name = "ESCOUFIER_45"
cumulate <- numeric(5); p <- 0; vt <- 0; vr <- 0; vrt <- 0; rvt <- 0; RV <- 0; j <- 0; k <- 0;
x2 <- 0; R <- 0; Rxx <- 0; Ryy <- 0; Rxy <- 0; Ryx <- 0; Rvmax <- 0
# Calculate the trace of a matrix (sum of its diagonal elements)
Trace <- function(y) {sum(c(y)[1 + 0:(min(dim(y)) - 1) * (dim(y)[1] + 1)], na.rm=FALSE)}
for (i in 1:runs) {
x <- abs(Rnorm(45*45)); dim(x) <- c(45, 45)
invisible(gc())
timing <- system.time({
# Calculation of Escoufier's equivalent vectors
p <- ncol(x)
vt <- 1:p # Variables to test
vr <- NULL # Result: ordered variables
RV <- 1:p # Result: correlations
vrt <- NULL
for (j in 1:p) { # loop on the variable number
Rvmax <- 0
for (k in 1:(p-j+1)) { # loop on the variables
x2 <- cbind(x, x[,vr], x[,vt[k]])
R <- cor(x2) # Correlations table
Ryy <- R[1:p, 1:p]
Rxx <- R[(p+1):(p+j), (p+1):(p+j)]
Rxy <- R[(p+1):(p+j), 1:p]
Ryx <- t(Rxy)
rvt <- Trace(Ryx %*% Rxy) / sqrt(Trace(Ryy %*% Ryy) * Trace(Rxx %*% Rxx)) # RV calculation
if (rvt > Rvmax) {
Rvmax <- rvt # test of RV
vrt <- vt[k] # temporary held variable
}
}
vr[j] <- vrt # Result: variable
RV[j] <- Rvmax # Result: correlation
vt <- vt[vt!=vr[j]] # reidentify variables to test
}
})
cumulate <- cumulate + timing
}
remove("x", "p", "vt", "vr", "vrt", "rvt", "RV", "j", "k")
remove("x2", "R", "Rxx", "Ryy", "Rxy", "Ryx", "Rvmax", "Trace")
timing <- cumulate/runs
times[5, 3] <- timing[3]
# Add the measurement
add_measurement(paste0("cput_", name), timing[1] + timing[2])
add_measurement(paste0("clock_", name), timing[3])
cat(c("Escoufier's method on a 45x45 matrix (mixed)________ (sec): ", timing[3], "\n"))
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console()
## Total time for Programmation tests
times[ , 3] <- sort(times[ , 3])
cat(" --------------------------------------------\n")
cat(c(" Trimmed geom. mean (2 extremes eliminated): ", exp(mean(log(times[2:4, 3]))), "\n\n\n"))
cat("\n\n")
# Add the overall measurement
add_measurement(paste0("trimgmean_", "PROGRAMMATION"), exp(mean(log(times[2:4, 3]))))
#### TOTAL TIME
cat("### Total time for synthetic tests\n\n")
cat(c("Total time for all 15 tests_________________________ (sec): ", sum(times), "\n"))
cat(c("Overall mean (sum of I, II and III trimmed means/3)_ (sec): ", exp(mean(log(times[2:4, ]))), "\n"))
cat("\n\n")
# Add the net time taken for all synthetic tests as a measurement
add_measurement(paste0("net_sum_", "SYNTHETIC"), sum(times))
}
# run r benchmark 2.5
run_synthetic <- function() {
run_and_measure("SYNTHETIC_ALL", rbenchmark(runs=8))
}
###############################################
## bench.R
## http://r.research.att.com/benchmarks/bench.R
###############################################
run_extras <- function() {
set.seed (1)
cat("\n")
cat("## Extra benchmarks\n\n")
# hilbert
hilbert <- function(n) 1/(outer(seq(n),seq(n),"+")-1)
X <- hilbert(1000)
run_and_measure("HILBERT_1000", eigen(X))
# loess
loess.me <- function(n) {
x<-rnorm(10^n); y<-rnorm(10^n); z<-rnorm(10^n)
invisible(loess(z~x+y))
}
run_and_measure("LOESS_4", loess.me(4))
}
#####################################
## Revolution Analytics Benchmarks
##
#####################################
# Matrix multiplication
run_matmult <- function(m=5000, n=5000){
cat("\n")
A <- matrix (runif (m*n),m,n)
run_and_measure("MATMUL_custom", sum(A%*%t(A)))
run_and_measure("MATMUL_tcross", tcrossprod(A))
}
# Linear Discriminant Analysis
run_lda <- function(m=10000, n=1000, g=5) {
cat("\n")
require ('MASS')
set.seed (1)
A <- matrix (runif (m*n),m,n)
A <- data.frame (A, fac=sample(LETTERS[1:g], m, replace=TRUE))
k <- round (m/2)
train <- sample(1:m, k)
run_and_measure("LDA", L <- lda(fac ~., data=A, prior=rep(1,g)/g, subset=train))
}
# Cholesky Factorization
# Compute the Choleski factorization of a real symmetric positive-definite square matrix.
# we calculate the average of chol and chol2inv in a large matrix
# uses LAPACK routines DPOTRF and DPSTRF
run_cholesky_large <- function(n=7000) {
cat("\n")
A <- matrix (runif (n*n), n, n)
B <- crossprod(A)
run_and_measure("CHOL_7K", C <- chol(B, pivot = T))
run_and_measure("CHOL2INV_7K", chol2inv(C))
}
# Singular Value Decomposition (SVD)
# uses LAPACK routines DGESDD and ZGESDD
run_svd <- function(m=10000, n=2000) {
cat("\n")
A <- matrix (runif (m*n),m,n)
run_and_measure("SVD", S <- svd(A,nu=0,nv=0))
}
# Principal Components Analysis (prcomp)
# prcomp uses svd, so we expect that LAPACK is used
# princomp uses eigen, so we also expect that LAPACK is used
run_pca <- function(m=4000, n=2000) {
A <- matrix (runif (m*n),m,n)
run_and_measure("PRCOMP", P <- prcomp(A))
run_and_measure("PRINCOMP", P <- princomp(A))
}
# QR decomposition
# using LAPACK routines DGEQP3 and ZGEQP3
run_qr <- function(m=7000, n=2000) {
X <- matrix(runif(m*n), m, n)
run_and_measure("QR_la", Y <- qr(X, LAPACK = T))
}
# Eigen
# Uses LAPACK routines DSYEVR, DGEEV, ZHEEV and ZGEEV
run_eigen <- function(m=3000) {
hilbert <- function(n) 1/(outer(seq(n),seq(n),"+")-1)
X <- hilbert(m)
run_and_measure("EIGEN", eigen(X))
}
# Kappa
# Uses LAPACK routines DTRCON and ZTRCON
run_kappa <- function(m=2000) {
X <- matrix(runif(m*m), m, m)
run_and_measure("KAPPA", kappa(X, norm = "O", LINPACK=F))
}
# SOLVE
# Uses LAPACK routines DGESV and ZGESV
run_solve <- function(m=3000) {
hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") }
X <- hilbert(m) + matrix(rnorm(m*m, 1), m)
run_and_measure("SOLVE", solve(X))
}
# NORM
# Uses LAPACK routine DLANGE
run_norm <- function(m=7000) {
hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") }
X <- hilbert(m)
#nTyp <- eval(formals(base::norm)$type)
nTyp <- c('M','1','O','I','F', 'E')
run_and_measure("NORM", sapply(nTyp, norm, x = X))
}
# BALANCE matrix
# Uses LAPACK DGEBAL
run_balance <- function(m=10000) {
X <- matrix(runif(m*m), m, m)
run_and_measure("BALANCE", expm::balance(X))
}
##########################
## Implicit parallelism ##
##########################
# Load the parallel library and fix conflicts with MKL/OpenMP multithreaded libs
load_parallel <- function() {
library(parallel)
# Fix conflicts for the MKL and OpenBlas OpenMP multithreaded libraries
if(exists("setMKLthreads", mode="function"))
setMKLthreads(1)
if(exists("openblas.set.num.threads", mode="function"))
openblas.set.num.threads(1)
}
# Produce a matrix with "times" permutations of the "tests" vector
permute_tests <- function(tests, times=4) {
if(times > 1) {
set.seed(NULL)
tests <- rbind(tests, rev(tests)) # adding reverse second row
for (i in seq(length.out = times-2))
tests <- rbind(tests, sample(tests[1, ], ncol(tests), replace = F))
}
tests
}
# Run the default benchmark in parallel
run_parallel <- function(cores=4) {
load_parallel()
set.seed (1)
runs <- rep(2, cores)
run_and_measure("PARALLEL", mclapply(runs, rbenchmark, mc.cores=cores))
}
# Run "cores" parallel threads for the same code (in the same order)
run_parallel_ident <- function(cores=4) {
load_parallel()
set.seed (1)
runs <- rep(1, cores)
bench <- function(runs=1) {
funcs <- c("run_matmult", "run_qr", "run_norm", "run_extras") # "run_solve"
sapply(funcs, function(x) eval(get(x)()))
}
run_and_measure("PARALLEL_IDENTICAL", mclapply(runs, bench, mc.cores=cores))
}
# Run "cores" parallel threads in a permutted order
run_parallel_permuted <- function(cores=4) {
load_parallel()
set.seed (1)
index <- 1:cores
# reset output file
system(": > /tmp/parbench")
funcs <- c("run_matmult", "run_qr", "run_norm", "run_extras") # "run_solve"
funcs <- permute_tests(funcs, cores)
bench <- function(index=1) {
set.seed(NULL)
cmd <- paste0("echo ", paste(funcs[index, ], collapse = " "), " >>/tmp/parbench")
system(cmd)
sapply(funcs[index,], function(x) eval(get(x)()))
}
run_and_measure("PARALLEL_PERMUTTED", mclapply(index, bench, mc.cores=cores))
}
##########################
## GPU tests ##
##########################
# Matrix multiplication on GPU
# gpuMatMult is merely a wrapper for the CUBLAS cublasDgemm function.
run_matmult_gpu <- function(m=5000, n=5000){
library("gputools")
cat("\n")
A <- matrix(runif (m*n),m,n)
run_and_measure("MATMUL_GPU", gpuMatMult(A, t(A)))
run_and_measure("MATMUL_tcross_GPU", gpuTcrossprod(A))
# Another one to compare with CROSS_PROD_2800 (CPU)
A <- matrix (runif(2800*2800),2800,2800)
run_and_measure("CROSS_PROD_2800_GPU", gpuCrossprod(A))
}
# QR decomposition on GPU
# gpuQR estimates the QR decomposition for a matrix using column pivoting and householder matrices.
run_qr_gpu <- function(m=7000, n=2000) {
library("gputools")
X <- matrix(runif(m*n), m, n)
run_and_measure("QR_GPU", gpuQr(X))
}
# Inverse of a Matrix using GPU
run_inverse_gpu <- function(m=1600, n=1600) {
library("gputools")
A <- matrix (runif (m*n),m,n)
run_and_measure("INVERSE_1600_GPU", b <- gpuSolve(A))
}
# Custom linear model impl. using gpu functions
run_custom_lm_gpu <- function(m=2000, n=2000) {
library("gputools")
A <- matrix (runif (m*n),m,n)
b <- as.double(1:m)
run_and_measure("LM_CUSTOM_GPU", c <- gpuSolve(gpuCrossprod(A), gpuCrossprod(A,b)))
}
# Run a sort task in GPU using the Rth:rthsort() function
run_sort_gpu <- function(m=7000000) {
library("Rth")
a <- runif(m)
run_and_measure("SORT_7M_GPU", b <- rthsort(a))
}
# Parallel running of matrix multiplication in the GPU
run_parallel_gpu <- function(reps=2) {
library(parallel)
set.seed (1)
cl <- makeForkCluster(2)
run_and_measure("GPU_PARALLEL", clusterCall(cl, function() run_matmult_gpu(2000,2000)))
stopCluster(cl)
}
# Run shipped demos
run_demos <- function() {
# demos <- unclass(demo(package="stats"))$results[,3]
demos <- c("glm.vr", "lm.glm", "nlm", "smooth")
run_and_measure("DEMOS", sapply(demos, function(x) demo(x, package="stats",
character.only = T, ask = FALSE)))
}
######################################
## Batch running tests
# LAPACK-using methods tests
# functions:
# eigen, qr, svd, chol, chol2inv, solve, det (uses QR)
# kappa, rcond (similar to kappa), norm, expm::balance
run_lapack <- function(m=10000, n=5000) {
set.seed(1)
run_and_measure("LAPACK_ALL", {
run_qr()
run_eigen()
run_norm()
run_cholesky_large()
run_svd()
run_balance()
run_pca()
run_kappa()
run_solve()
})
}
# We exclude the implicit parallelism test because some threaded BLAS configuration produce segfaults
run_safe <- function(jit_level=0) {
run_and_measure("TESTS_SAFE_ALL", {
run_synthetic()
run_extras()
run_matmult()
run_lda()
run_lapack()
})
}
run_gpu <- function() {
run_matmult_gpu()
run_qr_gpu
run_inverse_gpu()
run_custom_lm_gpu()
}
run_all <- function() {
run_and_measure("TESTS_ALL", {
run_safe()
run_parallel()
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment