Last active
September 3, 2020 19:31
-
-
Save jlperla/36870816a090e7e6ee11eb472ce761a5 to your computer and use it in GitHub Desktop.
Quadratic form computational kernel benchmarking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Raw Julia 2-tensor for N = 10 | |
164.902 ns (1 allocation: 160 bytes) | |
629.167 ns (6 allocations: 1.66 KiB) | |
Tullio with Tensor Julia 2-tensor for N = 10 | |
3.500 μs (8 allocations: 1.09 KiB) | |
20.200 μs (44 allocations: 3.67 KiB) | |
Raw Julia 3-tensor for N = 10 | |
3.237 μs (21 allocations: 10.47 KiB) | |
88.900 μs (464 allocations: 191.86 KiB) | |
TensorCast Julia 3-tensor for N = 10 | |
525.392 ns (6 allocations: 1.22 KiB) | |
15.200 μs (183 allocations: 17.25 KiB) | |
AVX Julia 3-tensor for N = 10 | |
292.509 ns (1 allocation: 160 bytes) | |
30.400 μs (195 allocations: 15.75 KiB) | |
Tullio with Tensor Julia 3-tensor for N = 10 | |
16.300 μs (61 allocations: 3.44 KiB) | |
161.601 μs (485 allocations: 32.52 KiB) | |
Tullio Base Julia 3-tensor for N = 10 | |
1.590 μs (1 allocation: 160 bytes) | |
35.300 μs (195 allocations: 15.75 KiB) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Raw Julia 2-tensor for N = 100 | |
84.999 μs (1 allocation: 896 bytes) | |
216.500 μs (7 allocations: 82.58 KiB) | |
Tullio with Tensor Julia 2-tensor for N = 100 | |
3.186 μs (7 allocations: 1.09 KiB) | |
38.499 μs (45 allocations: 81.72 KiB) | |
Raw Julia 3-tensor for N = 100 | |
15.756 ms (301 allocations: 7.72 MiB) | |
823.394 ms (3653 allocations: 1.50 GiB) | |
TensorCast Julia 3-tensor for N = 100 | |
240.400 μs (7 allocations: 79.27 KiB) | |
3.012 ms (906 allocations: 7.82 MiB) | |
AVX Julia 3-tensor for N = 100 | |
103.899 μs (50 allocations: 4.44 KiB) | |
4.398 ms (966 allocations: 7.67 MiB) | |
Tullio with Tensor Julia 3-tensor for N = 100 | |
929.800 μs (187 allocations: 16.84 KiB) | |
5.434 ms (1715 allocations: 7.73 MiB) | |
Tullio Base Julia 3-tensor for N = 100 | |
494.101 μs (50 allocations: 4.44 KiB) | |
7.715 ms (966 allocations: 7.67 MiB) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using LinearAlgebra, Tullio, Test, ForwardDiff, Random, Zygote, BenchmarkTools, LoopVectorization, TensorOperations, TensorCast | |
# 2-tensors | |
quad_form_raw(x, A::AbstractMatrix) = x' * A * x | |
quad_form_ten(x, A::AbstractArray{<:Number,2}) = @tullio c := x[j] * A[j,k] * x[k] | |
# 3-tensors | |
quad_form_raw(x, A::AbstractArray{<:Number,3}) = [x' * A[i,:,:] * x for i in 1:size(A,1)] | |
quad_form_mul(x, A::AbstractArray{<:Number,3}) = @matmul c[l] := sum(j) (@matmul [l,j] := sum(k) A[l,j,k] * x[k]) * x[j] | |
quad_form_avx(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k] tensor=false; | |
quad_form_ten(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k]; | |
quad_form_base(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k] tensor=false avx=false; | |
#tests | |
scalar_objective(f, x, A) = norm(f(x, A)) | |
function print_benchmarks(N) | |
A_2 = rand(N,N) | |
A_3 = rand(N,N,N) | |
x = rand(N) | |
# 2-tensors | |
printstyled("Raw Julia 2-tensor for N = $N\n", color=:red) | |
@btime scalar_objective(quad_form_raw, $x, $A_2) | |
@btime gradient(scalar_objective, quad_form_raw, $x, $A_2) | |
printstyled("Tullio with Tensor Julia 2-tensor for N = $N\n", color=:red) | |
@btime scalar_objective(quad_form_ten, $x, $A_2) | |
@btime gradient(scalar_objective, quad_form_ten, $x, $A_2) | |
# three tensor | |
printstyled("Raw Julia 3-tensor for N = $N\n", color=:red) | |
@btime scalar_objective(quad_form_raw, $x, $A_3) | |
@btime gradient(scalar_objective, quad_form_raw, $x, $A_3) | |
printstyled("TensorCast Julia 3-tensor for N = $N\n", color=:red) | |
@btime scalar_objective(quad_form_mul, $x, $A_3) | |
@btime gradient(scalar_objective, quad_form_mul, $x, $A_3) | |
printstyled("AVX Julia 3-tensor for N = $N\n", color=:red) | |
@btime scalar_objective(quad_form_avx, $x, $A_3) | |
@btime gradient(scalar_objective, quad_form_avx, $x, $A_3) | |
printstyled("Tullio with Tensor Julia 3-tensor for N = $N\n", color=:red) | |
@btime scalar_objective(quad_form_ten, $x, $A_3) | |
@btime gradient(scalar_objective, quad_form_ten, $x, $A_3) | |
printstyled("Tullio Base Julia 3-tensor for N = $N\n", color=:red) | |
@btime scalar_objective(quad_form_base, $x, $A_3) | |
@btime gradient(scalar_objective, quad_form_base, $x, $A_3) | |
end | |
# try for different sizes | |
#print_benchmarks(10) | |
print_benchmarks(100) | |
#print_benchmarks(250) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment