Created
August 5, 2018 14:29
-
-
Save devmotion/0866639f85ffb305e0485f00ae7f4025 to your computer and use it in GitHub Desktop.
Interpolation benchmarks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using MuladdMacro, EllipsisNotation, BenchmarkTools | |
# In place | |
@generated function f_loop!(out, dt, y, k, b, idxs::Nothing, ::Type{Val{N}}) where N | |
sumexpr = :(+($((:(k[$i][idx]*b[$i]) for i in 1:N)...))) | |
quote | |
@muladd @inbounds for idx in eachindex(out) | |
out[idx] = y[idx] + dt*$sumexpr | |
end | |
out | |
end | |
end | |
@generated function f_loop!(out, dt, y, k, b, idxs, ::Type{Val{N}}) where N | |
sumexpr = :(+($((:(k[$i][idx]*b[$i]) for i in 1:N)...))) | |
quote | |
@muladd @inbounds for (i,idx) in enumerate(idxs) | |
out[i] = y[idx] + dt*$sumexpr | |
end | |
out | |
end | |
end | |
@generated function f_broadcast!(out, dt, y, k, b, idxs::Nothing, ::Type{Val{N}}) where N | |
sumexpr = :(+($((:(k[$i]*b[$i]) for i in 1:N)...))) | |
quote | |
@muladd @. out = y + dt*$sumexpr | |
out | |
end | |
end | |
@generated function f_broadcast!(out, dt, y, k, b, idxs, ::Type{Val{N}}) where N | |
sumexpr = :(+($((:(k[$i][idxs]*b[$i]) for i in 1:N)...))) | |
quote | |
@muladd @views @. out = y[idxs] + dt*$sumexpr | |
out | |
end | |
end | |
# Out of place | |
@generated function f_broadcast(dt, y, k, b, idxs::Nothing, ::Type{Val{N}}) where N | |
sumexpr = :(+($((:(k[$i]*b[$i]) for i in 1:N)...))) | |
quote | |
@muladd @. y + dt*$sumexpr | |
end | |
end | |
@generated function f_broadcast(dt, y, k, b, idxs, ::Type{Val{N}}) where N | |
sumexpr = :(+($((:(k[$i][idxs]*b[$i]) for i in 1:N)...))) | |
quote | |
@muladd @views @. y[idxs] + dt*$sumexpr | |
end | |
end | |
# Display benchmark results | |
macro bdisp(expr) | |
quote | |
println($(expr.args[1])) | |
($BenchmarkTools.@benchmark $expr) |> display | |
println() | |
end | |
end | |
function run_benchmark(n, N) | |
@show n, N | |
out = rand(n); y = rand(n); allidxs = collect(1:n); dt = rand() | |
k = [rand(n) for _ in 1:N]; b = rand(N) | |
for idxs in (nothing, allidxs, ..) | |
@show idxs | |
idxs == Val{:..}() || | |
@bdisp f_loop!($out, $dt, $y, $k, $b, $idxs, $(Val{N})) | |
@bdisp f_broadcast!($out, $dt, $y, $k, $b, $idxs, $(Val{N})) | |
@bdisp f_broadcast($dt, $y, $k, $b, $idxs, $(Val{N})) | |
end | |
end | |
for N in (2, 5, 10, 15) | |
run_benchmark(100, N) | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(n, N) = (100, 2) | |
idxs = nothing | |
f_loop! | |
BenchmarkTools.Trial: | |
memory estimate: 16 bytes | |
allocs estimate: 1 | |
-------------- | |
minimum time: 178.517 ns (0.00% GC) | |
median time: 197.222 ns (0.00% GC) | |
mean time: 215.129 ns (3.40% GC) | |
maximum time: 49.360 μs (99.55% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 666 | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 16 bytes | |
allocs estimate: 1 | |
-------------- | |
minimum time: 85.594 ns (0.00% GC) | |
median time: 99.060 ns (0.00% GC) | |
mean time: 107.006 ns (5.08% GC) | |
maximum time: 36.729 μs (99.63% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 956 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 976 bytes | |
allocs estimate: 4 | |
-------------- | |
minimum time: 649.410 ns (0.00% GC) | |
median time: 746.042 ns (0.00% GC) | |
mean time: 797.310 ns (6.68% GC) | |
maximum time: 221.633 μs (99.53% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 156 | |
idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] | |
f_loop! | |
BenchmarkTools.Trial: | |
memory estimate: 16 bytes | |
allocs estimate: 1 | |
-------------- | |
minimum time: 207.893 ns (0.00% GC) | |
median time: 225.582 ns (0.00% GC) | |
mean time: 239.669 ns (3.81% GC) | |
maximum time: 62.751 μs (99.58% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 560 | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 256 bytes | |
allocs estimate: 10 | |
-------------- | |
minimum time: 383.901 ns (0.00% GC) | |
median time: 426.470 ns (0.00% GC) | |
mean time: 490.538 ns (7.03% GC) | |
maximum time: 168.830 μs (99.66% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 202 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 1.19 KiB | |
allocs estimate: 13 | |
-------------- | |
minimum time: 918.516 ns (0.00% GC) | |
median time: 1.020 μs (0.00% GC) | |
mean time: 1.298 μs (15.81% GC) | |
maximum time: 1.114 ms (99.87% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 31 | |
idxs = Val{:..}() | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 160 bytes | |
allocs estimate: 4 | |
-------------- | |
minimum time: 141.498 ns (0.00% GC) | |
median time: 158.581 ns (0.00% GC) | |
mean time: 173.669 ns (7.34% GC) | |
maximum time: 40.422 μs (99.50% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 846 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 1.09 KiB | |
allocs estimate: 7 | |
-------------- | |
minimum time: 722.526 ns (0.00% GC) | |
median time: 831.504 ns (0.00% GC) | |
mean time: 912.200 ns (9.31% GC) | |
maximum time: 254.465 μs (99.55% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 137 | |
(n, N) = (100, 5) | |
idxs = nothing | |
f_loop! | |
BenchmarkTools.Trial: | |
memory estimate: 16 bytes | |
allocs estimate: 1 | |
-------------- | |
minimum time: 437.697 ns (0.00% GC) | |
median time: 482.343 ns (0.00% GC) | |
mean time: 499.144 ns (1.55% GC) | |
maximum time: 77.686 μs (99.34% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 198 | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 16 bytes | |
allocs estimate: 1 | |
-------------- | |
minimum time: 373.800 ns (0.00% GC) | |
median time: 412.727 ns (0.00% GC) | |
mean time: 429.601 ns (1.72% GC) | |
maximum time: 74.530 μs (99.34% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 205 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 1.05 KiB | |
allocs estimate: 7 | |
-------------- | |
minimum time: 1.639 μs (0.00% GC) | |
median time: 1.840 μs (0.00% GC) | |
mean time: 2.454 μs (21.08% GC) | |
maximum time: 3.444 ms (99.86% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 10 | |
idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] | |
f_loop! | |
BenchmarkTools.Trial: | |
memory estimate: 16 bytes | |
allocs estimate: 1 | |
-------------- | |
minimum time: 499.773 ns (0.00% GC) | |
median time: 551.374 ns (0.00% GC) | |
mean time: 570.722 ns (1.36% GC) | |
maximum time: 78.803 μs (98.79% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 194 | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 496 bytes | |
allocs estimate: 19 | |
-------------- | |
minimum time: 1.047 μs (0.00% GC) | |
median time: 1.236 μs (0.00% GC) | |
mean time: 1.787 μs (28.36% GC) | |
maximum time: 3.607 ms (99.91% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 10 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 1.52 KiB | |
allocs estimate: 25 | |
-------------- | |
minimum time: 2.301 μs (0.00% GC) | |
median time: 2.493 μs (0.00% GC) | |
mean time: 3.258 μs (19.15% GC) | |
maximum time: 3.931 ms (99.85% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 9 | |
idxs = Val{:..}() | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 304 bytes | |
allocs estimate: 7 | |
-------------- | |
minimum time: 482.703 ns (0.00% GC) | |
median time: 535.344 ns (0.00% GC) | |
mean time: 608.793 ns (6.67% GC) | |
maximum time: 180.092 μs (99.62% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 195 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 1.33 KiB | |
allocs estimate: 13 | |
-------------- | |
minimum time: 1.790 μs (0.00% GC) | |
median time: 1.952 μs (0.00% GC) | |
mean time: 2.645 μs (21.31% GC) | |
maximum time: 3.525 ms (99.86% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 10 | |
(n, N) = (100, 10) | |
idxs = nothing | |
f_loop! | |
BenchmarkTools.Trial: | |
memory estimate: 16 bytes | |
allocs estimate: 1 | |
-------------- | |
minimum time: 1.076 μs (0.00% GC) | |
median time: 1.186 μs (0.00% GC) | |
mean time: 1.195 μs (0.00% GC) | |
maximum time: 4.528 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 10 | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 1.02 KiB | |
allocs estimate: 42 | |
-------------- | |
minimum time: 1.343 μs (0.00% GC) | |
median time: 1.509 μs (0.00% GC) | |
mean time: 2.150 μs (25.13% GC) | |
maximum time: 3.582 ms (99.90% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 10 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 2.20 KiB | |
allocs estimate: 53 | |
-------------- | |
minimum time: 3.764 μs (0.00% GC) | |
median time: 4.188 μs (0.00% GC) | |
mean time: 5.135 μs (15.21% GC) | |
maximum time: 4.622 ms (99.83% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 8 | |
idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] | |
f_loop! | |
BenchmarkTools.Trial: | |
memory estimate: 16 bytes | |
allocs estimate: 1 | |
-------------- | |
minimum time: 1.179 μs (0.00% GC) | |
median time: 1.464 μs (0.00% GC) | |
mean time: 1.591 μs (0.00% GC) | |
maximum time: 5.853 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 10 | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 1.30 KiB | |
allocs estimate: 52 | |
-------------- | |
minimum time: 2.161 μs (0.00% GC) | |
median time: 2.403 μs (0.00% GC) | |
mean time: 3.137 μs (20.31% GC) | |
maximum time: 4.085 ms (99.87% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 9 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 2.48 KiB | |
allocs estimate: 63 | |
-------------- | |
minimum time: 4.557 μs (0.00% GC) | |
median time: 5.047 μs (0.00% GC) | |
mean time: 6.110 μs (14.95% GC) | |
maximum time: 5.513 ms (99.86% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 7 | |
idxs = Val{:..}() | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 1.56 KiB | |
allocs estimate: 54 | |
-------------- | |
minimum time: 1.655 μs (0.00% GC) | |
median time: 1.869 μs (0.00% GC) | |
mean time: 2.574 μs (24.63% GC) | |
maximum time: 3.726 ms (99.91% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 10 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 2.75 KiB | |
allocs estimate: 65 | |
-------------- | |
minimum time: 4.032 μs (0.00% GC) | |
median time: 4.489 μs (0.00% GC) | |
mean time: 5.638 μs (17.18% GC) | |
maximum time: 5.577 ms (99.83% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 7 | |
(n, N) = (100, 15) | |
idxs = nothing | |
f_loop! | |
BenchmarkTools.Trial: | |
memory estimate: 16 bytes | |
allocs estimate: 1 | |
-------------- | |
minimum time: 1.962 μs (0.00% GC) | |
median time: 2.154 μs (0.00% GC) | |
mean time: 2.163 μs (0.00% GC) | |
maximum time: 6.176 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 10 | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 1.83 KiB | |
allocs estimate: 75 | |
-------------- | |
minimum time: 2.401 μs (0.00% GC) | |
median time: 2.622 μs (0.00% GC) | |
mean time: 3.412 μs (19.64% GC) | |
maximum time: 4.161 ms (99.91% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 9 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 3.17 KiB | |
allocs estimate: 91 | |
-------------- | |
minimum time: 5.987 μs (0.00% GC) | |
median time: 6.663 μs (0.00% GC) | |
mean time: 7.912 μs (13.44% GC) | |
maximum time: 6.188 ms (99.81% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 6 | |
idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100] | |
f_loop! | |
BenchmarkTools.Trial: | |
memory estimate: 16 bytes | |
allocs estimate: 1 | |
-------------- | |
minimum time: 2.101 μs (0.00% GC) | |
median time: 2.309 μs (0.00% GC) | |
mean time: 2.328 μs (0.00% GC) | |
maximum time: 8.207 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 9 | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 2.67 KiB | |
allocs estimate: 106 | |
-------------- | |
minimum time: 3.759 μs (0.00% GC) | |
median time: 4.157 μs (0.00% GC) | |
mean time: 5.172 μs (16.93% GC) | |
maximum time: 4.780 ms (99.86% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 8 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 4.05 KiB | |
allocs estimate: 123 | |
-------------- | |
minimum time: 7.376 μs (0.00% GC) | |
median time: 8.204 μs (0.00% GC) | |
mean time: 10.058 μs (16.27% GC) | |
maximum time: 9.521 ms (99.83% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 4 | |
idxs = Val{:..}() | |
f_broadcast! | |
BenchmarkTools.Trial: | |
memory estimate: 1.80 KiB | |
allocs estimate: 59 | |
-------------- | |
minimum time: 2.522 μs (0.00% GC) | |
median time: 2.812 μs (0.00% GC) | |
mean time: 3.657 μs (20.07% GC) | |
maximum time: 4.318 ms (99.91% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 9 | |
f_broadcast | |
BenchmarkTools.Trial: | |
memory estimate: 3.31 KiB | |
allocs estimate: 82 | |
-------------- | |
minimum time: 6.044 μs (0.00% GC) | |
median time: 6.717 μs (0.00% GC) | |
mean time: 8.288 μs (16.05% GC) | |
maximum time: 7.751 ms (99.86% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 5 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment