phipsgabler/quadratic_forms_benchmark.jl

## quadratic_forms_benchmark.jl
using BenchmarkTools
using Einsum
using TensorOperations

const N = 9851
const K = 35
const D = 16

const x = rand(N, D)
const μ = rand(D, K)
const W = rand(K, D, D)

const x′ = x'
const μ′ = μ'
const W′ = permutedims(W, [2, 3, 1])


function test_original(x, μ, W)
    res = zeros(N, K)

    for n in 1:N
        for k in 1:K
            d = x[n, :] - μ[:, k]
            res[n, k] = d' * W[k, :, :] * d
        end
    end

    res
end

function test_einsum(x, μ, W)
    @einsum res[n, k] := (x[n, i] - μ[i, k]) * W[k, i, j] * (x[n, j] - μ[j, k])
    res
end

function test_einsum_t(x, μ, W)
    @einsum res[n, k] := (x[i, n] - μ[i, k]) * W[i, j, k] * (x[j, n] - μ[j, k])
    res
end

function test_optimized!(res, x, μ, W)
    z = zero(eltype(x))

    for k = 1:size(μ, 1)
        for n = 1:size(x, 1)
            res[n, k] = z

            for i = 1:size(W, 1)
                for j = 1:size(W, 2)
                    @inbounds res[n, k] += (x[i, n] - μ[i, k]) * W[i, j, k] * (x[j, n] - μ[j, k])
                end
            end
        end
    end
end

function test_optimized(x, μ, W)
    res = zeros(N, K)
    test_optimized!(res, x, μ, W)
    res
end

function test_tensorops(x, μ, W)
    res = zeros(N, K)

    for k = 1:size(μ, 1)
        for n = 1:size(x, 1)
            @views d = (x[:, n] - μ[:, k])
            @views w = W[:, :, k]
            @tensor r[] := d[i] * w[i, j] * d[j]
            res[n, k] = r[]
        end
    end
end


# julia> @benchmark test_original($x, $μ, $W)
# BenchmarkTools.Trial:
#   memory estimate:  1017.73 MiB
#   allocs estimate:  3429967
#   --------------
#   minimum time:     347.353 ms (16.02% GC)
#   median time:      361.982 ms (15.94% GC)
#   mean time:        367.613 ms (17.20% GC)
#   maximum time:     436.865 ms (30.41% GC)
#   --------------
#   samples:          14
#   evals/sample:     1

# julia> @benchmark test_einsum($x, $μ, $W)
# BenchmarkTools.Trial:
#   memory estimate:  2.64 MiB
#   allocs estimate:  76
#   --------------
#   minimum time:     125.719 ms (0.00% GC)
#   median time:      127.536 ms (0.00% GC)
#   mean time:        127.761 ms (0.06% GC)
#   maximum time:     130.773 ms (0.55% GC)
#   --------------
#   samples:          40
#   evals/sample:     1

# julia> @benchmark test_einsum_t($(x′), $(μ), $(W′))
# BenchmarkTools.Trial:
#   memory estimate:  2.64 MiB
#   allocs estimate:  76
#   --------------
#   minimum time:     125.280 ms (0.00% GC)
#   median time:      126.987 ms (0.00% GC)
#   mean time:        127.192 ms (0.06% GC)
#   maximum time:     130.392 ms (0.60% GC)
#   --------------
#   samples:          40
#   evals/sample:     1

# julia> @benchmark test_optimized($(x′), $(μ), $(W′))
# BenchmarkTools.Trial:
#   memory estimate:  2.63 MiB
#   allocs estimate:  2
#   --------------
#   minimum time:     413.104 μs (0.00% GC)
#   median time:      521.215 μs (0.00% GC)
#   mean time:        560.746 μs (8.24% GC)
#   maximum time:     1.973 ms (0.00% GC)
#   --------------
#   samples:          8892
#   evals/sample:     1

# julia> @benchmark test_tensorops(x′, μ, W′)
# BenchmarkTools.Trial:
#   memory estimate:  3.76 MiB
#   allocs estimate:  11266
#   --------------
#   minimum time:     1.337 ms (0.00% GC)
#   median time:      1.421 ms (0.00% GC)
#   mean time:        1.686 ms (15.21% GC)
#   maximum time:     4.930 ms (48.20% GC)
#   --------------
#   samples:          2964
#   evals/sample:     1
	using BenchmarkTools
	using Einsum
	using TensorOperations

	const N = 9851
	const K = 35
	const D = 16

	const x = rand(N, D)
	const μ = rand(D, K)
	const W = rand(K, D, D)

	const x′ = x'
	const μ′ = μ'
	const W′ = permutedims(W, [2, 3, 1])


	function test_original(x, μ, W)
	res = zeros(N, K)

	for n in 1:N
	for k in 1:K
	d = x[n, :] - μ[:, k]
	res[n, k] = d' * W[k, :, :] * d
	end
	end

	res
	end

	function test_einsum(x, μ, W)
	@einsum res[n, k] := (x[n, i] - μ[i, k]) * W[k, i, j] * (x[n, j] - μ[j, k])
	res
	end

	function test_einsum_t(x, μ, W)
	@einsum res[n, k] := (x[i, n] - μ[i, k]) * W[i, j, k] * (x[j, n] - μ[j, k])
	res
	end

	function test_optimized!(res, x, μ, W)
	z = zero(eltype(x))

	for k = 1:size(μ, 1)
	for n = 1:size(x, 1)
	res[n, k] = z

	for i = 1:size(W, 1)
	for j = 1:size(W, 2)
	@inbounds res[n, k] += (x[i, n] - μ[i, k]) * W[i, j, k] * (x[j, n] - μ[j, k])
	end
	end
	end
	end
	end

	function test_optimized(x, μ, W)
	res = zeros(N, K)
	test_optimized!(res, x, μ, W)
	res
	end

	function test_tensorops(x, μ, W)
	res = zeros(N, K)

	for k = 1:size(μ, 1)
	for n = 1:size(x, 1)
	@views d = (x[:, n] - μ[:, k])
	@views w = W[:, :, k]
	@tensor r[] := d[i] * w[i, j] * d[j]
	res[n, k] = r[]
	end
	end
	end


	# julia> @benchmark test_original($x, $μ, $W)
	# BenchmarkTools.Trial:
	# memory estimate: 1017.73 MiB
	# allocs estimate: 3429967
	# --------------
	# minimum time: 347.353 ms (16.02% GC)
	# median time: 361.982 ms (15.94% GC)
	# mean time: 367.613 ms (17.20% GC)
	# maximum time: 436.865 ms (30.41% GC)
	# --------------
	# samples: 14
	# evals/sample: 1

	# julia> @benchmark test_einsum($x, $μ, $W)
	# BenchmarkTools.Trial:
	# memory estimate: 2.64 MiB
	# allocs estimate: 76
	# --------------
	# minimum time: 125.719 ms (0.00% GC)
	# median time: 127.536 ms (0.00% GC)
	# mean time: 127.761 ms (0.06% GC)
	# maximum time: 130.773 ms (0.55% GC)
	# --------------
	# samples: 40
	# evals/sample: 1

	# julia> @benchmark test_einsum_t($(x′), $(μ), $(W′))
	# BenchmarkTools.Trial:
	# memory estimate: 2.64 MiB
	# allocs estimate: 76
	# --------------
	# minimum time: 125.280 ms (0.00% GC)
	# median time: 126.987 ms (0.00% GC)
	# mean time: 127.192 ms (0.06% GC)
	# maximum time: 130.392 ms (0.60% GC)
	# --------------
	# samples: 40
	# evals/sample: 1

	# julia> @benchmark test_optimized($(x′), $(μ), $(W′))
	# BenchmarkTools.Trial:
	# memory estimate: 2.63 MiB
	# allocs estimate: 2
	# --------------
	# minimum time: 413.104 μs (0.00% GC)
	# median time: 521.215 μs (0.00% GC)
	# mean time: 560.746 μs (8.24% GC)
	# maximum time: 1.973 ms (0.00% GC)
	# --------------
	# samples: 8892
	# evals/sample: 1

	# julia> @benchmark test_tensorops(x′, μ, W′)
	# BenchmarkTools.Trial:
	# memory estimate: 3.76 MiB
	# allocs estimate: 11266
	# --------------
	# minimum time: 1.337 ms (0.00% GC)
	# median time: 1.421 ms (0.00% GC)
	# mean time: 1.686 ms (15.21% GC)
	# maximum time: 4.930 ms (48.20% GC)
	# --------------
	# samples: 2964
	# evals/sample: 1