Created
January 27, 2016 18:51
-
-
Save jfpuget/6f8c82b729677b0173d0 to your computer and use it in GitHub Desktop.
A Speed Comparison Of C, Julia, Python, Numba, and Cython on LU Factorization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// lu.c | |
inline int _(int row, int col, int rows){ | |
return row*rows + col; | |
} | |
void det_by_lu(double *y, double *x, int N){ | |
int i,j,k; | |
*y = 1.; | |
for(k = 0; k < N; ++k){ | |
*y *= x[_(k,k,N)]; | |
for(i = k+1; i < N; ++i){ | |
x[_(i,k,N)] /= x[_(k,k,N)]; | |
} | |
for(i = k+1; i < N; ++i){ | |
#pragma omp simd | |
for(j = k+1; j < N; ++j){ | |
x[_(i,j,N)] -= x[_(i,k,N)] * x[_(k,j,N)]; | |
} | |
} | |
} | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function det_by_lu(y, x, N) | |
y[1] = 1. | |
for k = 1:N | |
y[1] *= x[k,k] | |
for i = k+1:N | |
x[i,k] /= x[k,k] | |
end | |
for j = k+1:N | |
for i = k+1:N | |
x[i,j] -= x[i,k] * x[k,j] | |
end | |
end | |
end | |
end | |
function run_julia(y,A,B,N) | |
loops = max(10000000 // (N*N), 1) | |
print(loops) | |
for l in 1:loops | |
B[:,:] = A | |
det_by_lu(y, B, N) | |
end | |
end | |
y = [0.0] | |
N=5 | |
A = rand(N,N) | |
B = zeros(N,N) | |
@time run_julia(y,A,B,N) | |
N=5 | |
A = rand(N,N) | |
B = zeros(N,N) | |
@time run_julia(y,A,B,N) | |
N=10 | |
A = rand(N,N) | |
B = zeros(N,N) | |
@time run_julia(y,A,B,N) | |
N=30 | |
A = rand(N,N) | |
B = zeros(N,N) | |
@time run_julia(y,A,B,N) | |
N=100 | |
A = rand(N,N) | |
B = zeros(N,N) | |
@time run_julia(y,A,B,N) | |
N=200 | |
A = rand(N,N) | |
B = zeros(N,N) | |
@time run_julia(y,A,B,N) | |
N=300 | |
A = rand(N,N) | |
B = zeros(N,N) | |
@time run_julia(y,A,B,N) | |
N=400 | |
A = rand(N,N) | |
B = zeros(N,N) | |
@time run_julia(y,A,B,N) | |
N=600 | |
A = rand(N,N) | |
B = zeros(N,N) | |
@time run_julia(y,A,B,N) | |
N=1000 | |
A = rand(N,N) | |
B = zeros(N,N) | |
@time run_julia(y,A,B,N) | |
A perfect exercise for beginners
But why do you compile Cython code without optimizations?
You should try the fast version of Julia, i.e.:
using LoopVectorization
function det_by_lu(y, x, N)
y[1] = 1.
@turbo for k = 1:N
y[1] *= x[k,k]
for i = k+1:N
x[i,k] /= x[k,k]
end
for j = k+1:N
for i = k+1:N
x[i,j] -= x[i,k] * x[k,j]
end
end
end
end
This is quite a bit faster.
Why don't also compare with Julia's lapack?
using LinearAlgebra
A = rand(1000,1000)
@elapsed lu(A)
The pure Julia RecursiveFactorization.jl is faster than LAPACK BTW. https://github.com/YingboMa/RecursiveFactorization.jl it would be nice to add it to the benchmarks.
And here's some improvements to your Julia code. The result of the last expression is vector of elapsed times.
You can also add @fastmath @simd
in det_by_lu!
after @inbounds
it gives small speedup
function det_by_lu!(x::AbstractMatrix{T}, N = size(x, 2)) where T <: Number
y = one(T)
@inbounds for k in 1:N
y *= x[k,k]
@views x[k+1:N, k] ./= x[k,k]
for j in k+1:N, i in k+1:N
x[i,j] -= x[i,k] * x[k,j]
end
end
y
end
function run_julia(A, N = size(A, 2))
loops = max(10_000_000 ÷ N^2, 1)
B = similar(A)
elapsed = 0.
for _ in 1:loops
copy!(B, A)
elapsed += @elapsed det_by_lu!(B, N)
end
elapsed / loops
end
map([5, 10, 30, 100, 200, 300, 400, 600, 1000]) do N
run_julia(rand(N,N))
end
UPD. Forgot to say that you may also take a look at BenchmarkTools.jl, that provides @benchmark
and @btime
macro
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I would be cool to see how PyPy performs.