-
-
Save N0rbert/cfda101b8f0aa326df1edb6beee0076d to your computer and use it in GitHub Desktop.
#!/bin/sh | |
# This test script is a result of a discussion on AskUbuntu (https://askubuntu.com/q/1265756/66509 ) about using Intel MKL in Ubuntu | |
# | |
# Modern Ubuntu versions include Intel MKL libraries since Ubuntu 19.10 (such as https://packages.ubuntu.com/focal/libmkl-full-dev ) | |
# and it is expected that this library may be used by some scientific applicaitons like Octave, Scilab and others. | |
# | |
# Test method: | |
# 1. Execute this script with default mathematical libraries, save the results | |
# 2. Install the Intel MKL library with `sudo apt-get install libmkl-full-dev` confirming its usage as default math libraries alternative | |
# 3. Execute this script again after Intel MKL installation to compare the results | |
# Matrix size for all tests | |
size=500 | |
# Known maximal eigenvalue for size 500 | |
eigvalue_500=16.914886 | |
# Octave test program is written by Archisman Panigrahi, adapted by Norbert | |
cat << EOF > /tmp/test_octave.m | |
% Octave test program is written by Archisman Panigrahi, adapted by Norbert | |
M = $size; | |
i = 1:M; | |
c = sin(i' + i.^2); | |
tic; | |
g = eig(c); | |
toc | |
m = max(real(g)) | |
if (M == 500) | |
assert (m, $eigvalue_500, 1e-6) | |
%Correct result is ans = 16.915 | |
%With MKL in Ubuntu 20.04, I get random numbers of order 10^5 - 10^6, which changes every time | |
endif | |
EOF | |
# Scilab | |
cat << EOF > /tmp/test_scilab.sce | |
mode(0); | |
M = $size; | |
i = 1:M; | |
c = sin( repmat(i', 1, length(i)) + repmat(i, length(i), 1).^2 ); | |
tic(); | |
g = spec(c); | |
t = toc() | |
m = max(real(g)) | |
if M==500 then | |
assert_checkalmostequal(m, $eigvalue_500, 1e-6); | |
end | |
EOF | |
# Julia | |
cat << EOF > /tmp/test_julia.jl | |
using LinearAlgebra | |
M = $size; | |
c = [ sin(a + b.^2) for a in 1:M, b in 1:M] | |
t0 = time(); | |
g = LinearAlgebra.eigvals(c); | |
t = time() - t0 | |
println("t=", t) | |
m = maximum(real(g)) | |
println("m=", m) | |
if M == 500 | |
@assert isapprox(m, $eigvalue_500, rtol=1e-6) | |
end | |
EOF | |
# Python 3 with Numpy | |
cat << EOF > /tmp/test_numpy.py | |
import numpy | |
from time import time | |
M = $size; | |
c = numpy.empty((M, M)); | |
for i in range(1,M+1): | |
for j in range(1,M+1): | |
c[i-1][j-1] = numpy.sin(i + numpy.power(j, 2.)) | |
t0 = time(); | |
g = numpy.linalg.eigvals(c); | |
t = time() - t0; | |
print("t = ", t) | |
m = numpy.max(numpy.real(g)); | |
print("m = ", m) | |
if M == 500: | |
numpy.testing.assert_almost_equal(m, $eigvalue_500, 6) | |
EOF | |
# Python 3 with Torch and CUDA | |
cat << EOF > /tmp/test_torch.py | |
import torch | |
import numpy | |
from time import time | |
M = 500; | |
c = numpy.empty((M, M)); | |
for i in range(1,M+1): | |
for j in range(1,M+1): | |
c[i-1][j-1] = numpy.sin(i + numpy.power(j, 2.)) | |
t0 = time(); | |
c_gpu = torch.tensor(c) | |
g = torch.linalg.eigvals(c_gpu) | |
t = time() - t0; | |
print("t = ", t) | |
m = torch.max(torch.real(g)); | |
print("m = ", m) | |
if M == 500: | |
numpy.testing.assert_almost_equal(m, $eigvalue_500, 6) | |
EOF | |
# R | |
cat << EOF > /tmp/test_r.R | |
# needs r-cran-assertthat deb-package | |
M = $size; | |
c = matrix(nrow=M, ncol=M) | |
for (i in 1:M) { | |
for (j in 1:M) { | |
c[i, j] = sin(i + j**2); | |
} | |
} | |
t0 = Sys.time(); | |
g = eigen(c); | |
t = Sys.time() - t0; | |
cat(sprintf("t = %f\n", t)); | |
m = max(Re(g[["values"]])); | |
cat(sprintf("m = %f\n", m)); | |
if (M == 500) { | |
assertthat::validate_that(abs(m - $eigvalue_500) < 1e-6); | |
} | |
EOF | |
###### Call test programs | |
echo | |
echo "> Scilab" | |
scilab-cli -nb -noatomsautoload -f /tmp/test_scilab.sce -quit | |
echo | |
echo "> Julia" | |
julia /tmp/test_julia.jl | |
echo | |
echo "> Python 3 with NumPy" | |
python3 /tmp/test_numpy.py | |
echo | |
echo "> Python 3 with Torch and CUDA" | |
python3 /tmp/test_torch.py | |
echo | |
echo "> R (will produce error for MKL)" | |
Rscript /tmp/test_r.R | |
echo | |
echo "> Octave (will produce error for MKL)" | |
octave-cli /tmp/test_octave.m | |
echo | |
echo "> Octave (with MKL error fixed)" | |
# need to define MKL_THREADING_LAYER environment variable | |
dpkg -l | grep --silent mkl && export MKL_THREADING_LAYER=gnu | |
octave-cli /tmp/test_octave.m | |
echo | |
echo "> R (with MKL error fixed)" | |
Rscript /tmp/test_r.R |
@N0rbert Could you add another line (before line 101) to evaluate it with Octave without exporting MKL_THREADING_LAYER=gnu? I originally wrote the script to come up with an example to show that Octave gives erroneous results with MKL, so that I could open the bug report. Maybe, the Octave with MKL should be moved to the last, so that setting MKL_THREADING_LAYER=gnu does not affect the execution of the other programs.
@apandada1
Thanks, I call Octave twice (in line 100 and in line 106).
And also I need to thank you for the whole idea of MKL usage in the Ubuntu 20.04 LTS :) It is significantly improve the performance.
Thanks, I call Octave twice (in line 100 and in line 106).
Moved Octave to the end as you recommended.
@N0rbert Should not it be M==500
in line 50? Right now it is m==500
.
Should not it be M==500 in line 50? Right now it is m==500.
Typo fixed, thanks.
@apandada1
It seems that I can't stop running this benchmark and test :)
So the current 16th revision adds support of R language. It also needs the MKL_THREADING_LAYER=gnu
to be set. Not sure about overall stability.
The table now looks as below:
Language | Default, time (s) | MKL, time (s) | Note |
---|---|---|---|
Scilab | 0.45 | 0.15 | |
Julia | 0.67 | 0.68 | does not use MKL |
Python 3 | 0.46 | 0.15 | |
Octave | 0.46 | 0.16 | needs MKL_THREADING_LAYER=gnu to be set |
R | 0.93 | 0.21 | needs MKL_THREADING_LAYER=gnu to be set |
But overall stability of MKL should really be tested.
I saw segfaults of Octave's __run_test_suite__
, Scilab's test_run('linear_algebra')
have more test fails with MKL enabled. Not sure how to run testsuites for other languages.
At least for Octave using OpenBlas,
lowering the number of threads speeds up this benchmark enormously:
OMP_NUM_THREADS=4 octave test_octave.m
Elapsed time is 0.100403 seconds.
While by default (OMP_NUM_THREADS=8 )
octave test_octave.m
Elapsed time is 0.226401 seconds.
This is on a quad-core i7-8650.
Would be interesting to know if MKL can also be tweaked that way.
With MKL the times are:
MKL_THREADING_LAYER=gnu OMP_NUM_THREADS=4 octave test_octave.m
Elapsed time is 0.0853021 seconds.
MKL_THREADING_LAYER=gnu OMP_NUM_THREADS=8 octave test_octave.m
Elapsed time is 0.109377 seconds.
In conclusion, MKL is fastest, but the difference with OpenBlas is small if you tune the number of threads.
This test is likely memory rather than CPU bound.
Turns out that Julia has MKL. One just need to do using MKL
after installing the MKL package. I will run the script in my workstation once again
Test results for the 8th revision of the script on Ubuntu MATE 20.04 LTS with Intel i7-3537U: