Performance tests for JuliaLang/julia#21302
begin
n = 1000
mats = [ (Diagonal(randn(n)),
randn(n, n)),
(Diagonal(randn(n) + im * randn(n)),
randn(n, n) + im*randn(n, n)),
(Diagonal(map(x -> [(x+1) x; x x], 1:100)),
map(x -> [(x + 1) x; x x], randn(100,100)))
]
for (D, B) in mats
BB = similar(B)
# warmup
A_mul_B!(BB, D, B)
Ac_mul_B!(BB, D, B)
At_mul_B!(BB, D, B)
D*B
D'*B
D.'*B
println()
println(typeof(D), " ", typeof(B))
print("*\t\t"); gc(); @time D*B
print("'*\t\t"); gc(); @time D'*B
print(".'*\t\t"); gc(); @time D.'*B
println()
print("A_mul_B!\t"); gc(); @time A_mul_B!(BB, D, B)
print("Ac_mul_B!\t"); gc(); @time Ac_mul_B!(BB, D, B)
print("At_mul_B!\t"); gc(); @time At_mul_B!(BB, D, B)
end
nothing
end
begin
mats = [Diagonal(randn(10000)),
Diagonal(randn(10000) + im * randn(10000)),
Diagonal(map(x -> [(x+1) x; x x], 1:1000))]
for D in mats
v = copy(D.diag)
vv = similar(D.diag)
# warmup
A_mul_B!(vv, D, v)
Ac_mul_B!(vv, D, v)
At_mul_B!(vv, D, v)
D*v
D'*v
D.'*v
println()
println(typeof(D), " ", typeof(v))
print("*\t\t"); gc(); @time D*v
print("'*\t\t"); gc(); @time D'*v
print(".'*\t\t"); gc(); @time D.'*v
println()
print("A_mul_B!\t"); gc(); @time A_mul_B!(vv, D, v)
print("Ac_mul_B!\t"); gc(); @time Ac_mul_B!(vv, D, v)
print("At_mul_B!\t"); gc(); @time At_mul_B!(vv, D, v)
end
nothing
end
Before: current master (ed1049a56e)
julia> include("test2.jl")
Diagonal{Float64} Array{Float64,2}
* 0.001198 seconds (2 allocations: 7.629 MiB)
'* 1.656632 seconds (2 allocations: 7.629 MiB)
.'* 1.661888 seconds (2 allocations: 7.629 MiB)
A_mul_B! 1.480572 seconds (6 allocations: 336 bytes)
Ac_mul_B! 1.657474 seconds
At_mul_B! 1.636803 seconds
Diagonal{Complex{Float64}} Array{Complex{Float64},2}
* 0.004676 seconds (2 allocations: 15.259 MiB)
'* 2.178191 seconds (2 allocations: 15.259 MiB)
.'* 2.008998 seconds (2 allocations: 15.259 MiB)
A_mul_B! 1.502208 seconds (6 allocations: 336 bytes)
Ac_mul_B! 2.163784 seconds
At_mul_B! 2.051391 seconds
Diagonal{Array{Int64,2}} Array{Array{Float64,2},2}
* 0.000611 seconds (10.00 k allocations: 1.144 MiB)
'* 0.230182 seconds (3.05 M allocations: 325.830 MiB, 17.70% gc time)
.'* 0.241858 seconds (3.05 M allocations: 325.830 MiB, 17.68% gc time)
A_mul_B! 0.238180 seconds (3.05 M allocations: 325.754 MiB, 17.32% gc time)
Ac_mul_B! 0.231788 seconds (3.05 M allocations: 325.754 MiB, 18.28% gc time)
At_mul_B! 0.237086 seconds (3.05 M allocations: 325.754 MiB, 17.51% gc time)
Diagonal{Float64} Array{Float64,1}
* 0.000026 seconds (2 allocations: 78.203 KiB)
'* 1.164684 seconds (2 allocations: 78.203 KiB)
.'* 1.168408 seconds (2 allocations: 78.203 KiB)
A_mul_B! 1.704508 seconds
Ac_mul_B! 1.165695 seconds
At_mul_B! 1.168382 seconds
Diagonal{Complex{Float64}} Array{Complex{Float64},1}
* 0.000042 seconds (2 allocations: 156.328 KiB)
'* 1.286453 seconds (2 allocations: 156.328 KiB)
.'* 1.321514 seconds (2 allocations: 156.328 KiB)
A_mul_B! 1.767199 seconds
Ac_mul_B! 1.313114 seconds
At_mul_B! 1.321353 seconds
Diagonal{Array{Int64,2}} Array{Array{Int64,2},1}
* 0.000089 seconds (1.00 k allocations: 117.313 KiB)
'* 0.276157 seconds (3.00 M allocations: 320.976 MiB, 9.45% gc time)
.'* 0.271997 seconds (3.00 M allocations: 320.976 MiB, 9.57% gc time)
A_mul_B! 0.230046 seconds (3.00 M allocations: 320.968 MiB, 9.47% gc time)
Ac_mul_B! 0.259102 seconds (3.00 M allocations: 320.968 MiB, 6.08% gc time)
At_mul_B! 0.279454 seconds (3.00 M allocations: 320.968 MiB, 9.33% gc time)
After: this PR
julia> include("test2.jl")
Diagonal{Float64} Array{Float64,2}
* 0.002358 seconds (2 allocations: 7.629 MiB)
'* 0.002867 seconds (2 allocations: 7.629 MiB)
.'* 0.002473 seconds (2 allocations: 7.629 MiB)
A_mul_B! 0.001265 seconds
Ac_mul_B! 0.001444 seconds
At_mul_B! 0.001404 seconds
Diagonal{Complex{Float64}} Array{Complex{Float64},2}
* 0.005271 seconds (2 allocations: 15.259 MiB)
'* 0.005098 seconds (2 allocations: 15.259 MiB)
.'* 0.004995 seconds (2 allocations: 15.259 MiB)
A_mul_B! 0.002655 seconds
Ac_mul_B! 0.002684 seconds
At_mul_B! 0.002586 seconds
Diagonal{Array{Int64,2}} Array{Array{Float64,2},2}
* 0.000843 seconds (10.00 k allocations: 1.144 MiB)
'* 0.001460 seconds (20.00 k allocations: 2.213 MiB)
.'* 0.001441 seconds (20.00 k allocations: 2.213 MiB)
A_mul_B! 0.000703 seconds (10.00 k allocations: 1.068 MiB)
Ac_mul_B! 0.001697 seconds (20.00 k allocations: 2.136 MiB)
At_mul_B! 0.001417 seconds (20.00 k allocations: 2.136 MiB)
Diagonal{Float64} Array{Float64,1}
* 0.000025 seconds (2 allocations: 78.203 KiB)
'* 0.000038 seconds (2 allocations: 78.203 KiB)
.'* 0.000026 seconds (2 allocations: 78.203 KiB)
A_mul_B! 0.000022 seconds
Ac_mul_B! 0.000020 seconds
At_mul_B! 0.000021 seconds
Diagonal{Complex{Float64}} Array{Complex{Float64},1}
* 0.000047 seconds (2 allocations: 156.328 KiB)
'* 0.000045 seconds (2 allocations: 156.328 KiB)
.'* 0.000070 seconds (2 allocations: 156.328 KiB)
A_mul_B! 0.000052 seconds
Ac_mul_B! 0.000042 seconds
At_mul_B! 0.000121 seconds
Diagonal{Array{Int64,2}} Array{Array{Int64,2},1}
* 0.000078 seconds (1.00 k allocations: 117.313 KiB)
'* 0.000187 seconds (2.00 k allocations: 226.688 KiB)
.'* 0.000143 seconds (2.00 k allocations: 226.688 KiB)
A_mul_B! 0.000075 seconds (1000 allocations: 109.375 KiB)
Ac_mul_B! 0.000114 seconds (2.00 k allocations: 218.750 KiB)
At_mul_B! 0.000175 seconds (2.00 k allocations: 218.750 KiB)