Last active
June 6, 2017 13:20
-
-
Save bjarthur/9ae4b30c4087dcd229d9 to your computer and use it in GitHub Desktop.
extrema implementations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Base.Cartesian, Base.Test, BenchmarkTools | |
extrema_mapslices(v,region) = mapslices(extrema, v, region) | |
function extrema_functor(a::AbstractArray, dim) | |
dim = tuple(dim...) | |
mi = minimum(a,dim) | |
ma = maximum(a,dim) | |
reshape(collect(zip(mi,ma)), size(mi)) | |
end | |
function extrema_cartesian(A::AbstractArray, dims) | |
sz = [size(A)...] | |
sz[[dims...]] = 1 | |
B = Array{Tuple{eltype(A),eltype(A)}}(sz...) | |
extrema_cartesian!(B, A) | |
end | |
function extrema_cartesian_simd(A::AbstractArray, dims) | |
sz = [size(A)...] | |
sz[[dims...]] = 1 | |
B = Array{Tuple{eltype(A),eltype(A)}}(sz...) | |
extrema_cartesian_simd!(B, A) | |
end | |
function extrema_cartesianrange(A::AbstractArray, dims) | |
sz = [size(A)...] | |
sz[[dims...]] = 1 | |
B = Array{Tuple{eltype(A),eltype(A)}}(sz...) | |
extrema_cartesianrange!(B, A) | |
end | |
@generated function extrema_cartesian!(B,A::Array{T,N}) where {T,N} | |
return quote | |
sA = size(A) | |
sB = size(B) | |
@nloops $N i B begin | |
AI = @nref $N A i | |
(@nref $N B i) = (AI, AI) | |
end | |
Bmax = sB | |
@inbounds @nloops $N i A begin | |
AI = @nref $N A i | |
@nexprs $N d->(j_d = min(Bmax[d], i_{d})) | |
BJ = @nref $N B j | |
#if AI < BJ[1] | |
# (@nref $N B j) = (AI, BJ[2]) | |
#elseif AI > BJ[2] | |
# (@nref $N B j) = (BJ[1], AI) | |
#end | |
BJ = ifelse(AI < BJ[1], (AI, BJ[2]), BJ) | |
BJ = ifelse(AI > BJ[2], (BJ[1], AI), BJ) | |
(@nref $N B j) = BJ | |
end | |
return B | |
end | |
end | |
@generated function extrema_cartesian_simd!(B,A::Array{T,N}) where {T,N} | |
return quote | |
sA = size(A) | |
sB = size(B) | |
@nloops $N i B begin | |
AI = @nref $N A i | |
(@nref $N B i) = (AI, AI) | |
end | |
Bmax = sB | |
@inbounds @nloops $(N-1) i d -> indices(A,d+1) begin | |
@inbounds @simd for i_0 = indices(A,1) | |
AI = @nref $N A d->i_{d-1} | |
@nexprs $N d->(j_d = min(Bmax[d], i_{d-1})) | |
BJ = @nref $N B j | |
#if AI < BJ[1] | |
# (@nref $N B j) = (AI, BJ[2]) | |
#elseif AI > BJ[2] | |
# (@nref $N B j) = (BJ[1], AI) | |
#end | |
BJ = ifelse(AI < BJ[1], (AI, BJ[2]), BJ) | |
BJ = ifelse(AI > BJ[2], (BJ[1], AI), BJ) | |
(@nref $N B j) = BJ | |
end | |
end | |
return B | |
end | |
end | |
@noinline function extrema_cartesianrange!(B, A) | |
sA = size(A) | |
sB = size(B) | |
for I in CartesianRange(sB) | |
AI = A[I] | |
B[I] = (AI, AI) | |
end | |
Bmax = CartesianIndex(sB) | |
@inbounds @simd for I in CartesianRange(sA) | |
J = min(Bmax,I) | |
BJ = B[J] | |
AI = A[I] | |
#if AI < BJ[1] | |
# B[J] = (AI, BJ[2]) | |
#elseif AI > BJ[2] | |
# B[J] = (BJ[1], AI) | |
#end | |
BJ = ifelse(AI < BJ[1], (AI, BJ[2]), BJ) | |
BJ = ifelse(AI > BJ[2], (BJ[1], AI), BJ) | |
B[J] = BJ | |
end | |
return B | |
end | |
foo = Dict( | |
10 => rand(10,11,12), | |
100 => rand(100,101,102), | |
1000 => rand(1000,1001,1002)) | |
global dim, sz | |
for dim in [1,2,(1,2)], sz in sort(collect(keys(foo))) | |
info("dim=",dim,", sz=",sz) | |
@test extrema_mapslices(foo[sz],dim) == extrema_functor(foo[sz],dim) | |
@test extrema_mapslices(foo[sz],dim) == extrema_cartesian(foo[sz],dim) | |
@test extrema_mapslices(foo[sz],dim) == extrema_cartesian_simd(foo[sz],dim) | |
@test extrema_mapslices(foo[sz],dim) == extrema_cartesianrange(foo[sz],dim) | |
gc(); print("mapslices "); @btime extrema_mapslices(foo[sz],dim); | |
gc(); print("functor "); @btime extrema_functor(foo[sz],dim); | |
gc(); print("cartesian "); @btime extrema_cartesian(foo[sz],dim); | |
gc(); print("cartesian_simd"); @btime extrema_cartesian_simd(foo[sz],dim); | |
gc(); print("cartesianrange"); @btime extrema_cartesianrange(foo[sz],dim); | |
end |
using ifelse
is faster when dim=2
but slower otherwise. it also makes the simd and non-simd variants of cartesian equal in speed for small arrays:
%%% if ... elseif ... end %%% ifelse
INFO: dim=1, sz=10 INFO: dim=1, sz=10
mapslices 122.846 μs (1621 allocations: 74.69 KiB) mapslices 126.879 μs (1621 allocations: 74.69 KiB)
functor 9.191 μs (22 allocations: 5.09 KiB) functor 10.858 μs (22 allocations: 5.09 KiB)
cartesian 2.878 μs (4 allocations: 2.50 KiB) cartesian 3.068 μs (4 allocations: 2.50 KiB)
cartesian_simd 5.463 μs (4 allocations: 2.50 KiB) cartesian_simd 3.117 μs (4 allocations: 2.50 KiB)
cartesianrange 3.227 μs (4 allocations: 2.50 KiB) cartesianrange 3.354 μs (4 allocations: 2.50 KiB)
INFO: dim=1, sz=100 INFO: dim=1, sz=100
mapslices 13.213 ms (123662 allocations: 12.74 MiB) mapslices 14.398 ms (123662 allocations: 12.74 MiB)
functor 4.311 ms (25 allocations: 322.86 KiB) functor 4.357 ms (25 allocations: 322.86 KiB)
cartesian 2.290 ms (5 allocations: 161.39 KiB) cartesian 2.437 ms (5 allocations: 161.39 KiB)
cartesian_simd 2.236 ms (5 allocations: 161.39 KiB) cartesian_simd 2.439 ms (5 allocations: 161.39 KiB)
cartesianrange 2.242 ms (5 allocations: 161.39 KiB) cartesianrange 2.460 ms (5 allocations: 161.39 KiB)
INFO: dim=1, sz=1000 INFO: dim=1, sz=1000
mapslices 8.208 s (13018533 allocations: 7.98 GiB) mapslices 8.566 s (13018533 allocations: 7.98 GiB)
functor 4.036 s (25 allocations: 30.61 MiB) functor 4.599 s (25 allocations: 30.61 MiB)
cartesian 1.796 s (10 allocations: 15.31 MiB) cartesian 3.023 s (10 allocations: 15.31 MiB)
cartesian_simd 1.790 s (10 allocations: 15.31 MiB) cartesian_simd 2.878 s (10 allocations: 15.31 MiB)
cartesianrange 1.699 s (10 allocations: 15.31 MiB) cartesianrange 2.981 s (10 allocations: 15.31 MiB)
INFO: dim=2, sz=10 INFO: dim=2, sz=10
mapslices 122.202 μs (1477 allocations: 69.98 KiB) mapslices 126.456 μs (1477 allocations: 69.98 KiB)
functor 5.644 μs (22 allocations: 4.73 KiB) functor 5.885 μs (22 allocations: 4.73 KiB)
cartesian 2.850 μs (4 allocations: 2.30 KiB) cartesian 2.669 μs (4 allocations: 2.30 KiB)
cartesian_simd 5.388 μs (4 allocations: 2.30 KiB) cartesian_simd 2.690 μs (4 allocations: 2.30 KiB)
cartesianrange 3.053 μs (4 allocations: 2.30 KiB) cartesianrange 2.641 μs (4 allocations: 2.30 KiB)
INFO: dim=2, sz=100 INFO: dim=2, sz=100
mapslices 15.031 ms (122438 allocations: 12.61 MiB) mapslices 15.137 ms (122438 allocations: 12.61 MiB)
functor 1.617 ms (25 allocations: 319.61 KiB) functor 1.662 ms (25 allocations: 319.61 KiB)
cartesian 2.205 ms (5 allocations: 159.77 KiB) cartesian 1.277 ms (5 allocations: 159.77 KiB)
cartesian_simd 2.212 ms (5 allocations: 159.77 KiB) cartesian_simd 1.242 ms (5 allocations: 159.77 KiB)
cartesianrange 2.200 ms (5 allocations: 159.77 KiB) cartesianrange 1.283 ms (5 allocations: 159.77 KiB)
INFO: dim=2, sz=1000 INFO: dim=2, sz=1000
mapslices 12.855 s (13005016 allocations: 8.03 GiB) mapslices 9.179 s (13005016 allocations: 8.03 GiB)
functor 2.026 s (25 allocations: 30.58 MiB) functor 1.991 s (25 allocations: 30.58 MiB)
cartesian 1.760 s (10 allocations: 15.29 MiB) cartesian 1.477 s (10 allocations: 15.29 MiB)
cartesian_simd 1.915 s (10 allocations: 15.29 MiB) cartesian_simd 1.632 s (10 allocations: 15.29 MiB)
cartesianrange 1.716 s (10 allocations: 15.29 MiB) cartesianrange 1.449 s (10 allocations: 15.29 MiB)
INFO: dim=(1, 2), sz=10 INFO: dim=(1, 2), sz=10
mapslices 21.477 μs (172 allocations: 17.50 KiB) mapslices 19.838 μs (172 allocations: 17.50 KiB)
functor 5.953 μs (20 allocations: 1.17 KiB) functor 5.947 μs (20 allocations: 1.17 KiB)
cartesian 2.469 μs (3 allocations: 544 bytes) cartesian 3.662 μs (3 allocations: 544 bytes)
cartesian_simd 2.746 μs (3 allocations: 544 bytes) cartesian_simd 3.666 μs (3 allocations: 544 bytes)
cartesianrange 2.611 μs (3 allocations: 544 bytes) cartesianrange 3.633 μs (3 allocations: 544 bytes)
INFO: dim=(1, 2), sz=100 INFO: dim=(1, 2), sz=100
mapslices 3.584 ms (1264 allocations: 7.90 MiB) mapslices 3.539 ms (1264 allocations: 7.90 MiB)
functor 3.213 ms (20 allocations: 4.02 KiB) functor 3.268 ms (20 allocations: 4.02 KiB)
cartesian 1.390 ms (3 allocations: 1.97 KiB) cartesian 2.592 ms (3 allocations: 1.97 KiB)
cartesian_simd 1.387 ms (3 allocations: 1.97 KiB) cartesian_simd 2.593 ms (3 allocations: 1.97 KiB)
cartesianrange 1.293 ms (3 allocations: 1.97 KiB) cartesianrange 2.592 ms (3 allocations: 1.97 KiB)
INFO: dim=(1, 2), sz=1000 INFO: dim=(1, 2), sz=1000
mapslices 7.608 s (12555 allocations: 7.47 GiB) mapslices 6.978 s (12555 allocations: 7.47 GiB)
functor 4.163 s (20 allocations: 32.31 KiB) functor 3.982 s (20 allocations: 32.31 KiB)
cartesian 1.615 s (7 allocations: 16.08 KiB) cartesian 2.916 s (7 allocations: 16.08 KiB)
cartesian_simd 1.616 s (7 allocations: 16.08 KiB) cartesian_simd 2.854 s (7 allocations: 16.08 KiB)
cartesianrange 1.535 s (7 allocations: 16.08 KiB) cartesianrange 2.909 s (7 allocations: 16.08 KiB)```
strange that @simd
did not improve the Cartesian variant much, as it had a huge effect on the CartesianRange
variant.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
adding
@inbounds @simd
to theCartesianRange
variant resulted in a performance equal toCartesian
. thanks tim!