-
-
Save N0rbert/cfda101b8f0aa326df1edb6beee0076d to your computer and use it in GitHub Desktop.
#!/bin/sh | |
# This test script is a result of a discussion on AskUbuntu (https://askubuntu.com/q/1265756/66509 ) about using Intel MKL in Ubuntu | |
# | |
# Modern Ubuntu versions include Intel MKL libraries since Ubuntu 19.10 (such as https://packages.ubuntu.com/focal/libmkl-full-dev ) | |
# and it is expected that this library may be used by some scientific applicaitons like Octave, Scilab and others. | |
# | |
# Test method: | |
# 1. Execute this script with default mathematical libraries, save the results | |
# 2. Install the Intel MKL library with `sudo apt-get install libmkl-full-dev` confirming its usage as default math libraries alternative | |
# 3. Execute this script again after Intel MKL installation to compare the results | |
# Matrix size for all tests | |
size=500 | |
# Known maximal eigenvalue for size 500 | |
eigvalue_500=16.914886 | |
# Octave test program is written by Archisman Panigrahi, adapted by Norbert | |
cat << EOF > /tmp/test_octave.m | |
% Octave test program is written by Archisman Panigrahi, adapted by Norbert | |
M = $size; | |
i = 1:M; | |
c = sin(i' + i.^2); | |
tic; | |
g = eig(c); | |
toc | |
m = max(real(g)) | |
if (M == 500) | |
assert (m, $eigvalue_500, 1e-6) | |
%Correct result is ans = 16.915 | |
%With MKL in Ubuntu 20.04, I get random numbers of order 10^5 - 10^6, which changes every time | |
endif | |
EOF | |
# Scilab | |
cat << EOF > /tmp/test_scilab.sce | |
mode(0); | |
M = $size; | |
i = 1:M; | |
c = sin( repmat(i', 1, length(i)) + repmat(i, length(i), 1).^2 ); | |
tic(); | |
g = spec(c); | |
t = toc() | |
m = max(real(g)) | |
if M==500 then | |
assert_checkalmostequal(m, $eigvalue_500, 1e-6); | |
end | |
EOF | |
# Julia | |
cat << EOF > /tmp/test_julia.jl | |
using LinearAlgebra | |
M = $size; | |
c = [ sin(a + b.^2) for a in 1:M, b in 1:M] | |
t0 = time(); | |
g = LinearAlgebra.eigvals(c); | |
t = time() - t0 | |
println("t=", t) | |
m = maximum(real(g)) | |
println("m=", m) | |
if M == 500 | |
@assert isapprox(m, $eigvalue_500, rtol=1e-6) | |
end | |
EOF | |
# Python 3 with Numpy | |
cat << EOF > /tmp/test_numpy.py | |
import numpy | |
from time import time | |
M = $size; | |
c = numpy.empty((M, M)); | |
for i in range(1,M+1): | |
for j in range(1,M+1): | |
c[i-1][j-1] = numpy.sin(i + numpy.power(j, 2.)) | |
t0 = time(); | |
g = numpy.linalg.eigvals(c); | |
t = time() - t0; | |
print("t = ", t) | |
m = numpy.max(numpy.real(g)); | |
print("m = ", m) | |
if M == 500: | |
numpy.testing.assert_almost_equal(m, $eigvalue_500, 6) | |
EOF | |
# Python 3 with Torch and CUDA | |
cat << EOF > /tmp/test_torch.py | |
import torch | |
import numpy | |
from time import time | |
M = 500; | |
c = numpy.empty((M, M)); | |
for i in range(1,M+1): | |
for j in range(1,M+1): | |
c[i-1][j-1] = numpy.sin(i + numpy.power(j, 2.)) | |
t0 = time(); | |
c_gpu = torch.tensor(c) | |
g = torch.linalg.eigvals(c_gpu) | |
t = time() - t0; | |
print("t = ", t) | |
m = torch.max(torch.real(g)); | |
print("m = ", m) | |
if M == 500: | |
numpy.testing.assert_almost_equal(m, $eigvalue_500, 6) | |
EOF | |
# R | |
cat << EOF > /tmp/test_r.R | |
# needs r-cran-assertthat deb-package | |
M = $size; | |
c = matrix(nrow=M, ncol=M) | |
for (i in 1:M) { | |
for (j in 1:M) { | |
c[i, j] = sin(i + j**2); | |
} | |
} | |
t0 = Sys.time(); | |
g = eigen(c); | |
t = Sys.time() - t0; | |
cat(sprintf("t = %f\n", t)); | |
m = max(Re(g[["values"]])); | |
cat(sprintf("m = %f\n", m)); | |
if (M == 500) { | |
assertthat::validate_that(abs(m - $eigvalue_500) < 1e-6); | |
} | |
EOF | |
###### Call test programs | |
echo | |
echo "> Scilab" | |
scilab-cli -nb -noatomsautoload -f /tmp/test_scilab.sce -quit | |
echo | |
echo "> Julia" | |
julia /tmp/test_julia.jl | |
echo | |
echo "> Python 3 with NumPy" | |
python3 /tmp/test_numpy.py | |
echo | |
echo "> Python 3 with Torch and CUDA" | |
python3 /tmp/test_torch.py | |
echo | |
echo "> R (will produce error for MKL)" | |
Rscript /tmp/test_r.R | |
echo | |
echo "> Octave (will produce error for MKL)" | |
octave-cli /tmp/test_octave.m | |
echo | |
echo "> Octave (with MKL error fixed)" | |
# need to define MKL_THREADING_LAYER environment variable | |
dpkg -l | grep --silent mkl && export MKL_THREADING_LAYER=gnu | |
octave-cli /tmp/test_octave.m | |
echo | |
echo "> R (with MKL error fixed)" | |
Rscript /tmp/test_r.R |
At least for Octave using OpenBlas,
lowering the number of threads speeds up this benchmark enormously:
OMP_NUM_THREADS=4 octave test_octave.m
Elapsed time is 0.100403 seconds.
While by default (OMP_NUM_THREADS=8 )
octave test_octave.m
Elapsed time is 0.226401 seconds.
This is on a quad-core i7-8650.
Would be interesting to know if MKL can also be tweaked that way.
With MKL the times are:
MKL_THREADING_LAYER=gnu OMP_NUM_THREADS=4 octave test_octave.m
Elapsed time is 0.0853021 seconds.
MKL_THREADING_LAYER=gnu OMP_NUM_THREADS=8 octave test_octave.m
Elapsed time is 0.109377 seconds.
In conclusion, MKL is fastest, but the difference with OpenBlas is small if you tune the number of threads.
This test is likely memory rather than CPU bound.
Turns out that Julia has MKL. One just need to do using MKL
after installing the MKL package. I will run the script in my workstation once again
@apandada1
It seems that I can't stop running this benchmark and test :)
So the current 16th revision adds support of R language. It also needs the
MKL_THREADING_LAYER=gnu
to be set. Not sure about overall stability.The table now looks as below:
MKL_THREADING_LAYER=gnu
to be setMKL_THREADING_LAYER=gnu
to be setBut overall stability of MKL should really be tested.
I saw segfaults of Octave's
__run_test_suite__
, Scilab'stest_run('linear_algebra')
have more test fails with MKL enabled. Not sure how to run testsuites for other languages.