Skip to content

Instantly share code, notes, and snippets.

@jlperla
Last active September 3, 2020 19:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jlperla/36870816a090e7e6ee11eb472ce761a5 to your computer and use it in GitHub Desktop.
Save jlperla/36870816a090e7e6ee11eb472ce761a5 to your computer and use it in GitHub Desktop.
Quadratic form computational kernel benchmarking
Raw Julia 2-tensor for N = 10
164.902 ns (1 allocation: 160 bytes)
629.167 ns (6 allocations: 1.66 KiB)
Tullio with Tensor Julia 2-tensor for N = 10
3.500 μs (8 allocations: 1.09 KiB)
20.200 μs (44 allocations: 3.67 KiB)
Raw Julia 3-tensor for N = 10
3.237 μs (21 allocations: 10.47 KiB)
88.900 μs (464 allocations: 191.86 KiB)
TensorCast Julia 3-tensor for N = 10
525.392 ns (6 allocations: 1.22 KiB)
15.200 μs (183 allocations: 17.25 KiB)
AVX Julia 3-tensor for N = 10
292.509 ns (1 allocation: 160 bytes)
30.400 μs (195 allocations: 15.75 KiB)
Tullio with Tensor Julia 3-tensor for N = 10
16.300 μs (61 allocations: 3.44 KiB)
161.601 μs (485 allocations: 32.52 KiB)
Tullio Base Julia 3-tensor for N = 10
1.590 μs (1 allocation: 160 bytes)
35.300 μs (195 allocations: 15.75 KiB)
Raw Julia 2-tensor for N = 100
84.999 μs (1 allocation: 896 bytes)
216.500 μs (7 allocations: 82.58 KiB)
Tullio with Tensor Julia 2-tensor for N = 100
3.186 μs (7 allocations: 1.09 KiB)
38.499 μs (45 allocations: 81.72 KiB)
Raw Julia 3-tensor for N = 100
15.756 ms (301 allocations: 7.72 MiB)
823.394 ms (3653 allocations: 1.50 GiB)
TensorCast Julia 3-tensor for N = 100
240.400 μs (7 allocations: 79.27 KiB)
3.012 ms (906 allocations: 7.82 MiB)
AVX Julia 3-tensor for N = 100
103.899 μs (50 allocations: 4.44 KiB)
4.398 ms (966 allocations: 7.67 MiB)
Tullio with Tensor Julia 3-tensor for N = 100
929.800 μs (187 allocations: 16.84 KiB)
5.434 ms (1715 allocations: 7.73 MiB)
Tullio Base Julia 3-tensor for N = 100
494.101 μs (50 allocations: 4.44 KiB)
7.715 ms (966 allocations: 7.67 MiB)
using LinearAlgebra, Tullio, Test, ForwardDiff, Random, Zygote, BenchmarkTools, LoopVectorization, TensorOperations, TensorCast
# 2-tensors
quad_form_raw(x, A::AbstractMatrix) = x' * A * x
quad_form_ten(x, A::AbstractArray{<:Number,2}) = @tullio c := x[j] * A[j,k] * x[k]
# 3-tensors
quad_form_raw(x, A::AbstractArray{<:Number,3}) = [x' * A[i,:,:] * x for i in 1:size(A,1)]
quad_form_mul(x, A::AbstractArray{<:Number,3}) = @matmul c[l] := sum(j) (@matmul [l,j] := sum(k) A[l,j,k] * x[k]) * x[j]
quad_form_avx(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k] tensor=false;
quad_form_ten(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k];
quad_form_base(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k] tensor=false avx=false;
#tests
scalar_objective(f, x, A) = norm(f(x, A))
function print_benchmarks(N)
A_2 = rand(N,N)
A_3 = rand(N,N,N)
x = rand(N)
# 2-tensors
printstyled("Raw Julia 2-tensor for N = $N\n", color=:red)
@btime scalar_objective(quad_form_raw, $x, $A_2)
@btime gradient(scalar_objective, quad_form_raw, $x, $A_2)
printstyled("Tullio with Tensor Julia 2-tensor for N = $N\n", color=:red)
@btime scalar_objective(quad_form_ten, $x, $A_2)
@btime gradient(scalar_objective, quad_form_ten, $x, $A_2)
# three tensor
printstyled("Raw Julia 3-tensor for N = $N\n", color=:red)
@btime scalar_objective(quad_form_raw, $x, $A_3)
@btime gradient(scalar_objective, quad_form_raw, $x, $A_3)
printstyled("TensorCast Julia 3-tensor for N = $N\n", color=:red)
@btime scalar_objective(quad_form_mul, $x, $A_3)
@btime gradient(scalar_objective, quad_form_mul, $x, $A_3)
printstyled("AVX Julia 3-tensor for N = $N\n", color=:red)
@btime scalar_objective(quad_form_avx, $x, $A_3)
@btime gradient(scalar_objective, quad_form_avx, $x, $A_3)
printstyled("Tullio with Tensor Julia 3-tensor for N = $N\n", color=:red)
@btime scalar_objective(quad_form_ten, $x, $A_3)
@btime gradient(scalar_objective, quad_form_ten, $x, $A_3)
printstyled("Tullio Base Julia 3-tensor for N = $N\n", color=:red)
@btime scalar_objective(quad_form_base, $x, $A_3)
@btime gradient(scalar_objective, quad_form_base, $x, $A_3)
end
# try for different sizes
#print_benchmarks(10)
print_benchmarks(100)
#print_benchmarks(250)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment