Skip to content

Instantly share code, notes, and snippets.

@jwscook
Last active March 11, 2024 18:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jwscook/0dff1b8da843102206350c566c66a4b3 to your computer and use it in GitHub Desktop.
Save jwscook/0dff1b8da843102206350c566c66a4b3 to your computer and use it in GitHub Desktop.
Understanding non-allocating factorization methods in scalapack
using LinearAlgebra, Test, Random
Random.seed!(0)
@testset "understand raw lapack calls" begin
@show Threads.nthreads()
@show BLAS.get_num_threads()
m = 5
n = 4
A = rand(m, n);
Acopy = deepcopy(A);
b = rand(m)
b1 = deepcopy(b)
x = Acopy \ b
function qrnonalloc!(A, b)
m, n = size(A)
k = min(m, n)
tau = zeros(eltype(A), min(m, n))
LAPACK.geqrf!(A, tau) # factor into R and householder reflectors
LAPACK.ormqr!('L', 'T', A, tau, b) # multiply Q' * b, R x = Q' b
LAPACK.trtrs!('U', 'N', 'N', (@view A[1:n, :]), (@view b[1:n]))
return (@view b[1:n])
end
@test qrnonalloc!(deepcopy(Acopy), deepcopy(b)) ≈ x
#gels! allocated a minimum of 1.8x
_, x2 = LAPACK.gels!('N', deepcopy(Acopy), deepcopy(b))
@test x2 ≈ x
A = rand(4, 4)
Acopy = deepcopy(A)
b = rand(4)
bcopy = deepcopy(b)
L, U = lu(A)
function lunonalloc!(A, b)
m, n = size(A)
factors, ipiv, info = LAPACK.getrf!(A)
LAPACK.getrs!('N', factors, ipiv, b)
return (@view b[1:n])
end
@test lunonalloc!(A, b) ≈ (lu(Acopy) \ bcopy)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment