Skip to content

Instantly share code, notes, and snippets.

@georgemarrows
Last active April 11, 2017 14:03
Show Gist options
  • Save georgemarrows/678876779e7292a954a344aa3addeaf1 to your computer and use it in GitHub Desktop.
Save georgemarrows/678876779e7292a954a344aa3addeaf1 to your computer and use it in GitHub Desktop.
Performance improvements for Julia Diagonal * Vector etc

Performance tests for JuliaLang/julia#21302

begin
    n = 1000
    mats = [ (Diagonal(randn(n)),
              randn(n, n)),

             (Diagonal(randn(n) + im * randn(n)),
              randn(n, n) + im*randn(n, n)),
             
             (Diagonal(map(x -> [(x+1) x; x x], 1:100)),
              map(x -> [(x + 1) x; x x], randn(100,100)))
             ]
    for (D, B) in mats
        BB = similar(B)
        
        # warmup
        A_mul_B!(BB, D, B)
        Ac_mul_B!(BB, D, B)
        At_mul_B!(BB, D, B)
        D*B
        D'*B
        D.'*B

        println()
        println(typeof(D), " ", typeof(B))
        print("*\t\t");   gc(); @time D*B
        print("'*\t\t");  gc(); @time D'*B
        print(".'*\t\t"); gc(); @time D.'*B
        println()
        print("A_mul_B!\t");  gc(); @time A_mul_B!(BB, D, B)
        print("Ac_mul_B!\t"); gc(); @time Ac_mul_B!(BB, D, B)
        print("At_mul_B!\t"); gc(); @time At_mul_B!(BB, D, B)
    end

    nothing
end

begin
    mats = [Diagonal(randn(10000)),
            Diagonal(randn(10000) + im * randn(10000)),
            Diagonal(map(x -> [(x+1) x; x x], 1:1000))]
    for D in mats
        v = copy(D.diag)
        vv = similar(D.diag)
        
        # warmup
        A_mul_B!(vv, D, v)
        Ac_mul_B!(vv, D, v)
        At_mul_B!(vv, D, v)
        D*v
        D'*v
        D.'*v

        println()
        println(typeof(D), " ", typeof(v))
        print("*\t\t");   gc(); @time D*v
        print("'*\t\t");  gc(); @time D'*v
        print(".'*\t\t"); gc(); @time D.'*v
        println()
        print("A_mul_B!\t");  gc(); @time A_mul_B!(vv, D, v)
        print("Ac_mul_B!\t"); gc(); @time Ac_mul_B!(vv, D, v)
        print("At_mul_B!\t"); gc(); @time At_mul_B!(vv, D, v)
    end

    nothing
end
Before: current master (ed1049a56e)


julia> include("test2.jl")

Diagonal{Float64} Array{Float64,2}
*		  0.001198 seconds (2 allocations: 7.629 MiB)
'*		  1.656632 seconds (2 allocations: 7.629 MiB)
.'*		  1.661888 seconds (2 allocations: 7.629 MiB)

A_mul_B!	  1.480572 seconds (6 allocations: 336 bytes)
Ac_mul_B!	  1.657474 seconds
At_mul_B!	  1.636803 seconds

Diagonal{Complex{Float64}} Array{Complex{Float64},2}
*		  0.004676 seconds (2 allocations: 15.259 MiB)
'*		  2.178191 seconds (2 allocations: 15.259 MiB)
.'*		  2.008998 seconds (2 allocations: 15.259 MiB)

A_mul_B!	  1.502208 seconds (6 allocations: 336 bytes)
Ac_mul_B!	  2.163784 seconds
At_mul_B!	  2.051391 seconds

Diagonal{Array{Int64,2}} Array{Array{Float64,2},2}
*		  0.000611 seconds (10.00 k allocations: 1.144 MiB)
'*		  0.230182 seconds (3.05 M allocations: 325.830 MiB, 17.70% gc time)
.'*		  0.241858 seconds (3.05 M allocations: 325.830 MiB, 17.68% gc time)

A_mul_B!	  0.238180 seconds (3.05 M allocations: 325.754 MiB, 17.32% gc time)
Ac_mul_B!	  0.231788 seconds (3.05 M allocations: 325.754 MiB, 18.28% gc time)
At_mul_B!	  0.237086 seconds (3.05 M allocations: 325.754 MiB, 17.51% gc time)

Diagonal{Float64} Array{Float64,1}
*		  0.000026 seconds (2 allocations: 78.203 KiB)
'*		  1.164684 seconds (2 allocations: 78.203 KiB)
.'*		  1.168408 seconds (2 allocations: 78.203 KiB)

A_mul_B!	  1.704508 seconds
Ac_mul_B!	  1.165695 seconds
At_mul_B!	  1.168382 seconds

Diagonal{Complex{Float64}} Array{Complex{Float64},1}
*		  0.000042 seconds (2 allocations: 156.328 KiB)
'*		  1.286453 seconds (2 allocations: 156.328 KiB)
.'*		  1.321514 seconds (2 allocations: 156.328 KiB)

A_mul_B!	  1.767199 seconds
Ac_mul_B!	  1.313114 seconds
At_mul_B!	  1.321353 seconds

Diagonal{Array{Int64,2}} Array{Array{Int64,2},1}
*		  0.000089 seconds (1.00 k allocations: 117.313 KiB)
'*		  0.276157 seconds (3.00 M allocations: 320.976 MiB, 9.45% gc time)
.'*		  0.271997 seconds (3.00 M allocations: 320.976 MiB, 9.57% gc time)

A_mul_B!	  0.230046 seconds (3.00 M allocations: 320.968 MiB, 9.47% gc time)
Ac_mul_B!	  0.259102 seconds (3.00 M allocations: 320.968 MiB, 6.08% gc time)
At_mul_B!	  0.279454 seconds (3.00 M allocations: 320.968 MiB, 9.33% gc time)



After: this PR

julia> include("test2.jl")

Diagonal{Float64} Array{Float64,2}
*		  0.002358 seconds (2 allocations: 7.629 MiB)
'*		  0.002867 seconds (2 allocations: 7.629 MiB)
.'*		  0.002473 seconds (2 allocations: 7.629 MiB)

A_mul_B!	  0.001265 seconds
Ac_mul_B!	  0.001444 seconds
At_mul_B!	  0.001404 seconds

Diagonal{Complex{Float64}} Array{Complex{Float64},2}
*		  0.005271 seconds (2 allocations: 15.259 MiB)
'*		  0.005098 seconds (2 allocations: 15.259 MiB)
.'*		  0.004995 seconds (2 allocations: 15.259 MiB)

A_mul_B!	  0.002655 seconds
Ac_mul_B!	  0.002684 seconds
At_mul_B!	  0.002586 seconds

Diagonal{Array{Int64,2}} Array{Array{Float64,2},2}
*		  0.000843 seconds (10.00 k allocations: 1.144 MiB)
'*		  0.001460 seconds (20.00 k allocations: 2.213 MiB)
.'*		  0.001441 seconds (20.00 k allocations: 2.213 MiB)

A_mul_B!	  0.000703 seconds (10.00 k allocations: 1.068 MiB)
Ac_mul_B!	  0.001697 seconds (20.00 k allocations: 2.136 MiB)
At_mul_B!	  0.001417 seconds (20.00 k allocations: 2.136 MiB)

Diagonal{Float64} Array{Float64,1}
*		  0.000025 seconds (2 allocations: 78.203 KiB)
'*		  0.000038 seconds (2 allocations: 78.203 KiB)
.'*		  0.000026 seconds (2 allocations: 78.203 KiB)

A_mul_B!	  0.000022 seconds
Ac_mul_B!	  0.000020 seconds
At_mul_B!	  0.000021 seconds

Diagonal{Complex{Float64}} Array{Complex{Float64},1}
*		  0.000047 seconds (2 allocations: 156.328 KiB)
'*		  0.000045 seconds (2 allocations: 156.328 KiB)
.'*		  0.000070 seconds (2 allocations: 156.328 KiB)

A_mul_B!	  0.000052 seconds
Ac_mul_B!	  0.000042 seconds
At_mul_B!	  0.000121 seconds

Diagonal{Array{Int64,2}} Array{Array{Int64,2},1}
*		  0.000078 seconds (1.00 k allocations: 117.313 KiB)
'*		  0.000187 seconds (2.00 k allocations: 226.688 KiB)
.'*		  0.000143 seconds (2.00 k allocations: 226.688 KiB)

A_mul_B!	  0.000075 seconds (1000 allocations: 109.375 KiB)
Ac_mul_B!	  0.000114 seconds (2.00 k allocations: 218.750 KiB)
At_mul_B!	  0.000175 seconds (2.00 k allocations: 218.750 KiB)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment