matthieubulte/main.jl

## main.jl
using LinearAlgebra
using BenchmarkTools
using SparseArrays

# To exploit the sparsity of the matrix as much as possible, pre-compile the multiplication operation
# to skip the matrix entries look-up. Since the matrix is sparse this might generate only a few operations
# which are all inlined.
# Note that this only works with small matrices since otherwise the size of the function will just be to
# large.
# Note that this could also be avoided, but this is another topic.
function compile_lmul!(A::SparseArrays.AbstractSparseMatrixCSC)
    nzv = nonzeros(A)
    rv = rowvals(A)

    statements = []

    for k in 1:size(A, 1)
        for col in 1:size(A, 2)
            for j in nzrange(A, col)
                push!(statements,
                    :(@inbounds C[$(rv[j]), $(k)] += $(nzv[j])*B[$(col),$(k)])
                )
            end
        end
    end

    ex = Expr(:block)
    ex.args = statements

    eval(:(
        function(C, B)
            $(ex)
            C
        end
    ))
end


# Source of the model https://www.ronanarraes.com/2019/04/my-julia-workflow-readability-vs-performance/
const O3x3   = zeros(3,3)
const O3x12  = zeros(3,12)
const O12x18 = zeros(12,18)

I6  = Matrix{Float64}(I,6,6)
I18 = Matrix{Float64}(I,18,18)
λ   = 1/100

wx = Float64[ 0 -1  0;
              1  0  0;
              0  0  0;]

Ak_1 = [ wx     -I   O3x12;
         O3x3   λ*I  O3x12;
              O12x18      ;]

Fk_1 = exp(Ak_1);
mul_sFk_1! = compile_lmul!(sparse(sFk_1));

aux1 = zeros(18,18);
Pu = Matrix{Float64}(I,18,18)

@btime mul!(aux1, Fk_1, Pu);
#  619.859 ns (0 allocations: 0 bytes)

@btime mul!(aux1, sFk_1, Pu, 1, 1);
#  692.493 ns (0 allocations: 0 bytes)

@btime mul_sFk_1!(aux1, Pu);
#  157.352 ns (0 allocations: 0 bytes)
	using LinearAlgebra
	using BenchmarkTools
	using SparseArrays

	# To exploit the sparsity of the matrix as much as possible, pre-compile the multiplication operation
	# to skip the matrix entries look-up. Since the matrix is sparse this might generate only a few operations
	# which are all inlined.
	# Note that this only works with small matrices since otherwise the size of the function will just be to
	# large.
	# Note that this could also be avoided, but this is another topic.
	function compile_lmul!(A::SparseArrays.AbstractSparseMatrixCSC)
	nzv = nonzeros(A)
	rv = rowvals(A)

	statements = []

	for k in 1:size(A, 1)
	for col in 1:size(A, 2)
	for j in nzrange(A, col)
	push!(statements,
	:(@inbounds C[$(rv[j]), $(k)] += $(nzv[j])*B[$(col),$(k)])
	)
	end
	end
	end

	ex = Expr(:block)
	ex.args = statements

	eval(:(
	function(C, B)
	$(ex)
	C
	end
	))
	end


	# Source of the model https://www.ronanarraes.com/2019/04/my-julia-workflow-readability-vs-performance/
	const O3x3 = zeros(3,3)
	const O3x12 = zeros(3,12)
	const O12x18 = zeros(12,18)

	I6 = Matrix{Float64}(I,6,6)
	I18 = Matrix{Float64}(I,18,18)
	λ = 1/100

	wx = Float64[ 0 -1 0;
	1 0 0;
	0 0 0;]

	Ak_1 = [ wx -I O3x12;
	O3x3 λ*I O3x12;
	O12x18 ;]

	Fk_1 = exp(Ak_1);
	mul_sFk_1! = compile_lmul!(sparse(sFk_1));

	aux1 = zeros(18,18);
	Pu = Matrix{Float64}(I,18,18)

	@btime mul!(aux1, Fk_1, Pu);
	# 619.859 ns (0 allocations: 0 bytes)

	@btime mul!(aux1, sFk_1, Pu, 1, 1);
	# 692.493 ns (0 allocations: 0 bytes)

	@btime mul_sFk_1!(aux1, Pu);
	# 157.352 ns (0 allocations: 0 bytes)