jlperla/results_N_10.txt

## results_N_10.txt
Raw Julia 2-tensor for N = 10
  164.902 ns (1 allocation: 160 bytes)
  629.167 ns (6 allocations: 1.66 KiB)
Tullio with Tensor Julia 2-tensor for N = 10
  3.500 μs (8 allocations: 1.09 KiB)
  20.200 μs (44 allocations: 3.67 KiB)
Raw Julia 3-tensor for N = 10
  3.237 μs (21 allocations: 10.47 KiB)
  88.900 μs (464 allocations: 191.86 KiB)
TensorCast Julia 3-tensor for N = 10
  525.392 ns (6 allocations: 1.22 KiB)
  15.200 μs (183 allocations: 17.25 KiB)
AVX Julia 3-tensor for N = 10
  292.509 ns (1 allocation: 160 bytes)
  30.400 μs (195 allocations: 15.75 KiB)
Tullio with Tensor Julia 3-tensor for N = 10
  16.300 μs (61 allocations: 3.44 KiB)
  161.601 μs (485 allocations: 32.52 KiB)
Tullio Base Julia 3-tensor for N = 10
  1.590 μs (1 allocation: 160 bytes)
  35.300 μs (195 allocations: 15.75 KiB)

## results_N_100.txt
Raw Julia 2-tensor for N = 100
  84.999 μs (1 allocation: 896 bytes)
  216.500 μs (7 allocations: 82.58 KiB)
Tullio with Tensor Julia 2-tensor for N = 100
  3.186 μs (7 allocations: 1.09 KiB)
  38.499 μs (45 allocations: 81.72 KiB)
Raw Julia 3-tensor for N = 100
  15.756 ms (301 allocations: 7.72 MiB)
  823.394 ms (3653 allocations: 1.50 GiB)
TensorCast Julia 3-tensor for N = 100
  240.400 μs (7 allocations: 79.27 KiB)
  3.012 ms (906 allocations: 7.82 MiB)
AVX Julia 3-tensor for N = 100
  103.899 μs (50 allocations: 4.44 KiB)
  4.398 ms (966 allocations: 7.67 MiB)
Tullio with Tensor Julia 3-tensor for N = 100
  929.800 μs (187 allocations: 16.84 KiB)
  5.434 ms (1715 allocations: 7.73 MiB)
Tullio Base Julia 3-tensor for N = 100
  494.101 μs (50 allocations: 4.44 KiB)
  7.715 ms (966 allocations: 7.67 MiB)

## tensor_benchmarks.jl
using LinearAlgebra, Tullio, Test, ForwardDiff, Random, Zygote, BenchmarkTools, LoopVectorization, TensorOperations, TensorCast

# 2-tensors
quad_form_raw(x, A::AbstractMatrix) =  x' * A * x
quad_form_ten(x, A::AbstractArray{<:Number,2}) = @tullio c := x[j] * A[j,k] * x[k]

# 3-tensors
quad_form_raw(x, A::AbstractArray{<:Number,3}) = [x' * A[i,:,:] * x for i in 1:size(A,1)]
quad_form_mul(x, A::AbstractArray{<:Number,3}) = @matmul c[l] := sum(j) (@matmul [l,j] := sum(k) A[l,j,k] * x[k]) * x[j]
quad_form_avx(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k] tensor=false;
quad_form_ten(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k];
quad_form_base(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k] tensor=false avx=false;

#tests
scalar_objective(f, x, A) = norm(f(x, A))

function print_benchmarks(N)
    A_2 = rand(N,N)
    A_3 = rand(N,N,N)
    x = rand(N)
    # 2-tensors
    printstyled("Raw Julia 2-tensor for N = $N\n", color=:red)
    @btime scalar_objective(quad_form_raw, $x, $A_2)
    @btime gradient(scalar_objective, quad_form_raw, $x, $A_2)

    printstyled("Tullio with Tensor Julia 2-tensor for N = $N\n", color=:red)
    @btime scalar_objective(quad_form_ten, $x, $A_2)
    @btime gradient(scalar_objective, quad_form_ten, $x, $A_2)

    # three tensor
    printstyled("Raw Julia 3-tensor for N = $N\n", color=:red)
    @btime scalar_objective(quad_form_raw, $x, $A_3)
    @btime gradient(scalar_objective, quad_form_raw, $x, $A_3)

    printstyled("TensorCast Julia 3-tensor for N = $N\n", color=:red)
    @btime scalar_objective(quad_form_mul, $x, $A_3)
    @btime gradient(scalar_objective, quad_form_mul, $x, $A_3)

    printstyled("AVX Julia 3-tensor for N = $N\n", color=:red)
    @btime scalar_objective(quad_form_avx, $x, $A_3)
    @btime gradient(scalar_objective, quad_form_avx, $x, $A_3)

    printstyled("Tullio with Tensor Julia 3-tensor for N = $N\n", color=:red)
    @btime scalar_objective(quad_form_ten, $x, $A_3)
    @btime gradient(scalar_objective, quad_form_ten, $x, $A_3)

    printstyled("Tullio Base Julia 3-tensor for N = $N\n", color=:red)
    @btime scalar_objective(quad_form_base, $x, $A_3)
    @btime gradient(scalar_objective, quad_form_base, $x, $A_3)
end

# try for different sizes
#print_benchmarks(10)
print_benchmarks(100)
#print_benchmarks(250)
	Raw Julia 2-tensor for N = 10
	164.902 ns (1 allocation: 160 bytes)
	629.167 ns (6 allocations: 1.66 KiB)
	Tullio with Tensor Julia 2-tensor for N = 10
	3.500 μs (8 allocations: 1.09 KiB)
	20.200 μs (44 allocations: 3.67 KiB)
	Raw Julia 3-tensor for N = 10
	3.237 μs (21 allocations: 10.47 KiB)
	88.900 μs (464 allocations: 191.86 KiB)
	TensorCast Julia 3-tensor for N = 10
	525.392 ns (6 allocations: 1.22 KiB)
	15.200 μs (183 allocations: 17.25 KiB)
	AVX Julia 3-tensor for N = 10
	292.509 ns (1 allocation: 160 bytes)
	30.400 μs (195 allocations: 15.75 KiB)
	Tullio with Tensor Julia 3-tensor for N = 10
	16.300 μs (61 allocations: 3.44 KiB)
	161.601 μs (485 allocations: 32.52 KiB)
	Tullio Base Julia 3-tensor for N = 10
	1.590 μs (1 allocation: 160 bytes)
	35.300 μs (195 allocations: 15.75 KiB)
	Raw Julia 2-tensor for N = 100
	84.999 μs (1 allocation: 896 bytes)
	216.500 μs (7 allocations: 82.58 KiB)
	Tullio with Tensor Julia 2-tensor for N = 100
	3.186 μs (7 allocations: 1.09 KiB)
	38.499 μs (45 allocations: 81.72 KiB)
	Raw Julia 3-tensor for N = 100
	15.756 ms (301 allocations: 7.72 MiB)
	823.394 ms (3653 allocations: 1.50 GiB)
	TensorCast Julia 3-tensor for N = 100
	240.400 μs (7 allocations: 79.27 KiB)
	3.012 ms (906 allocations: 7.82 MiB)
	AVX Julia 3-tensor for N = 100
	103.899 μs (50 allocations: 4.44 KiB)
	4.398 ms (966 allocations: 7.67 MiB)
	Tullio with Tensor Julia 3-tensor for N = 100
	929.800 μs (187 allocations: 16.84 KiB)
	5.434 ms (1715 allocations: 7.73 MiB)
	Tullio Base Julia 3-tensor for N = 100
	494.101 μs (50 allocations: 4.44 KiB)
	7.715 ms (966 allocations: 7.67 MiB)
	using LinearAlgebra, Tullio, Test, ForwardDiff, Random, Zygote, BenchmarkTools, LoopVectorization, TensorOperations, TensorCast

	# 2-tensors
	quad_form_raw(x, A::AbstractMatrix) = x' * A * x
	quad_form_ten(x, A::AbstractArray{<:Number,2}) = @tullio c := x[j] * A[j,k] * x[k]

	# 3-tensors
	quad_form_raw(x, A::AbstractArray{<:Number,3}) = [x' * A[i,:,:] * x for i in 1:size(A,1)]
	quad_form_mul(x, A::AbstractArray{<:Number,3}) = @matmul c[l] := sum(j) (@matmul [l,j] := sum(k) A[l,j,k] * x[k]) * x[j]
	quad_form_avx(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k] tensor=false;
	quad_form_ten(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k];
	quad_form_base(x, A::AbstractArray{<:Number,3}) = @tullio c[l] := x[j] * A[l,j,k] * x[k] tensor=false avx=false;

	#tests
	scalar_objective(f, x, A) = norm(f(x, A))

	function print_benchmarks(N)
	A_2 = rand(N,N)
	A_3 = rand(N,N,N)
	x = rand(N)
	# 2-tensors
	printstyled("Raw Julia 2-tensor for N = $N\n", color=:red)
	@btime scalar_objective(quad_form_raw, $x, $A_2)
	@btime gradient(scalar_objective, quad_form_raw, $x, $A_2)

	printstyled("Tullio with Tensor Julia 2-tensor for N = $N\n", color=:red)
	@btime scalar_objective(quad_form_ten, $x, $A_2)
	@btime gradient(scalar_objective, quad_form_ten, $x, $A_2)

	# three tensor
	printstyled("Raw Julia 3-tensor for N = $N\n", color=:red)
	@btime scalar_objective(quad_form_raw, $x, $A_3)
	@btime gradient(scalar_objective, quad_form_raw, $x, $A_3)

	printstyled("TensorCast Julia 3-tensor for N = $N\n", color=:red)
	@btime scalar_objective(quad_form_mul, $x, $A_3)
	@btime gradient(scalar_objective, quad_form_mul, $x, $A_3)

	printstyled("AVX Julia 3-tensor for N = $N\n", color=:red)
	@btime scalar_objective(quad_form_avx, $x, $A_3)
	@btime gradient(scalar_objective, quad_form_avx, $x, $A_3)

	printstyled("Tullio with Tensor Julia 3-tensor for N = $N\n", color=:red)
	@btime scalar_objective(quad_form_ten, $x, $A_3)
	@btime gradient(scalar_objective, quad_form_ten, $x, $A_3)

	printstyled("Tullio Base Julia 3-tensor for N = $N\n", color=:red)
	@btime scalar_objective(quad_form_base, $x, $A_3)
	@btime gradient(scalar_objective, quad_form_base, $x, $A_3)
	end

	# try for different sizes
	#print_benchmarks(10)
	print_benchmarks(100)
	#print_benchmarks(250)