Instantly share code, notes, and snippets.

# jfpuget/LU_decomposition.ipynb

Created Jan 27, 2016
A Speed Comparison Of C, Julia, Python, Numba, and Cython on LU Factorization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 // lu.c inline int _(int row, int col, int rows){ return row*rows + col; } void det_by_lu(double *y, double *x, int N){ int i,j,k; *y = 1.; for(k = 0; k < N; ++k){ *y *= x[_(k,k,N)]; for(i = k+1; i < N; ++i){ x[_(i,k,N)] /= x[_(k,k,N)]; } for(i = k+1; i < N; ++i){ #pragma omp simd for(j = k+1; j < N; ++j){ x[_(i,j,N)] -= x[_(i,k,N)] * x[_(k,j,N)]; } } } }
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 function det_by_lu(y, x, N) y = 1. for k = 1:N y *= x[k,k] for i = k+1:N x[i,k] /= x[k,k] end for j = k+1:N for i = k+1:N x[i,j] -= x[i,k] * x[k,j] end end end end function run_julia(y,A,B,N) loops = max(10000000 // (N*N), 1) print(loops) for l in 1:loops B[:,:] = A det_by_lu(y, B, N) end end y = [0.0] N=5 A = rand(N,N) B = zeros(N,N) @time run_julia(y,A,B,N) N=5 A = rand(N,N) B = zeros(N,N) @time run_julia(y,A,B,N) N=10 A = rand(N,N) B = zeros(N,N) @time run_julia(y,A,B,N) N=30 A = rand(N,N) B = zeros(N,N) @time run_julia(y,A,B,N) N=100 A = rand(N,N) B = zeros(N,N) @time run_julia(y,A,B,N) N=200 A = rand(N,N) B = zeros(N,N) @time run_julia(y,A,B,N) N=300 A = rand(N,N) B = zeros(N,N) @time run_julia(y,A,B,N) N=400 A = rand(N,N) B = zeros(N,N) @time run_julia(y,A,B,N) N=600 A = rand(N,N) B = zeros(N,N) @time run_julia(y,A,B,N) N=1000 A = rand(N,N) B = zeros(N,N) @time run_julia(y,A,B,N)
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

### ChrisRackauckas commented Sep 22, 2021

You should try the fast version of Julia, i.e.:

```using LoopVectorization

function det_by_lu(y, x, N)
y = 1.

@turbo for k = 1:N
y *= x[k,k]
for i = k+1:N
x[i,k] /= x[k,k]
end
for j = k+1:N
for i = k+1:N
x[i,j] -= x[i,k] * x[k,j]
end
end
end
end```

This is quite a bit faster.

### pddshk commented Nov 20, 2021

Why don't also compare with Julia's lapack?

```using LinearAlgebra

A = rand(1000,1000)

@elapsed lu(A)```

### ChrisRackauckas commented Nov 20, 2021

The pure Julia RecursiveFactorization.jl is faster than LAPACK BTW. https://github.com/YingboMa/RecursiveFactorization.jl it would be nice to add it to the benchmarks.

### pddshk commented Nov 20, 2021 • edited

And here's some improvements to your Julia code. The result of the last expression is vector of elapsed times.

You can also add `@fastmath @simd` in `det_by_lu!` after `@inbounds` it gives small speedup

```function det_by_lu!(x::AbstractMatrix{T}, N = size(x, 2)) where T <: Number
y = one(T)
@inbounds for k in 1:N
y *= x[k,k]
@views x[k+1:N, k] ./= x[k,k]
for j in k+1:N, i in k+1:N
x[i,j] -= x[i,k] * x[k,j]
end
end
y
end

function run_julia(A, N = size(A, 2))
loops = max(10_000_000 ÷ N^2, 1)
B = similar(A)
elapsed = 0.
for _ in 1:loops
copy!(B, A)
elapsed += @elapsed det_by_lu!(B, N)
end
elapsed / loops
end

map([5, 10, 30, 100, 200, 300, 400, 600, 1000]) do N
run_julia(rand(N,N))
end```

UPD. Forgot to say that you may also take a look at BenchmarkTools.jl, that provides `@benchmark` and `@btime` macro