Skip to content

Instantly share code, notes, and snippets.

@bjarthur
Last active June 6, 2017 13:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bjarthur/9ae4b30c4087dcd229d9 to your computer and use it in GitHub Desktop.
Save bjarthur/9ae4b30c4087dcd229d9 to your computer and use it in GitHub Desktop.
extrema implementations
using Base.Cartesian, Base.Test, BenchmarkTools
extrema_mapslices(v,region) = mapslices(extrema, v, region)
function extrema_functor(a::AbstractArray, dim)
dim = tuple(dim...)
mi = minimum(a,dim)
ma = maximum(a,dim)
reshape(collect(zip(mi,ma)), size(mi))
end
function extrema_cartesian(A::AbstractArray, dims)
sz = [size(A)...]
sz[[dims...]] = 1
B = Array{Tuple{eltype(A),eltype(A)}}(sz...)
extrema_cartesian!(B, A)
end
function extrema_cartesian_simd(A::AbstractArray, dims)
sz = [size(A)...]
sz[[dims...]] = 1
B = Array{Tuple{eltype(A),eltype(A)}}(sz...)
extrema_cartesian_simd!(B, A)
end
function extrema_cartesianrange(A::AbstractArray, dims)
sz = [size(A)...]
sz[[dims...]] = 1
B = Array{Tuple{eltype(A),eltype(A)}}(sz...)
extrema_cartesianrange!(B, A)
end
@generated function extrema_cartesian!(B,A::Array{T,N}) where {T,N}
return quote
sA = size(A)
sB = size(B)
@nloops $N i B begin
AI = @nref $N A i
(@nref $N B i) = (AI, AI)
end
Bmax = sB
@inbounds @nloops $N i A begin
AI = @nref $N A i
@nexprs $N d->(j_d = min(Bmax[d], i_{d}))
BJ = @nref $N B j
#if AI < BJ[1]
# (@nref $N B j) = (AI, BJ[2])
#elseif AI > BJ[2]
# (@nref $N B j) = (BJ[1], AI)
#end
BJ = ifelse(AI < BJ[1], (AI, BJ[2]), BJ)
BJ = ifelse(AI > BJ[2], (BJ[1], AI), BJ)
(@nref $N B j) = BJ
end
return B
end
end
@generated function extrema_cartesian_simd!(B,A::Array{T,N}) where {T,N}
return quote
sA = size(A)
sB = size(B)
@nloops $N i B begin
AI = @nref $N A i
(@nref $N B i) = (AI, AI)
end
Bmax = sB
@inbounds @nloops $(N-1) i d -> indices(A,d+1) begin
@inbounds @simd for i_0 = indices(A,1)
AI = @nref $N A d->i_{d-1}
@nexprs $N d->(j_d = min(Bmax[d], i_{d-1}))
BJ = @nref $N B j
#if AI < BJ[1]
# (@nref $N B j) = (AI, BJ[2])
#elseif AI > BJ[2]
# (@nref $N B j) = (BJ[1], AI)
#end
BJ = ifelse(AI < BJ[1], (AI, BJ[2]), BJ)
BJ = ifelse(AI > BJ[2], (BJ[1], AI), BJ)
(@nref $N B j) = BJ
end
end
return B
end
end
@noinline function extrema_cartesianrange!(B, A)
sA = size(A)
sB = size(B)
for I in CartesianRange(sB)
AI = A[I]
B[I] = (AI, AI)
end
Bmax = CartesianIndex(sB)
@inbounds @simd for I in CartesianRange(sA)
J = min(Bmax,I)
BJ = B[J]
AI = A[I]
#if AI < BJ[1]
# B[J] = (AI, BJ[2])
#elseif AI > BJ[2]
# B[J] = (BJ[1], AI)
#end
BJ = ifelse(AI < BJ[1], (AI, BJ[2]), BJ)
BJ = ifelse(AI > BJ[2], (BJ[1], AI), BJ)
B[J] = BJ
end
return B
end
foo = Dict(
10 => rand(10,11,12),
100 => rand(100,101,102),
1000 => rand(1000,1001,1002))
global dim, sz
for dim in [1,2,(1,2)], sz in sort(collect(keys(foo)))
info("dim=",dim,", sz=",sz)
@test extrema_mapslices(foo[sz],dim) == extrema_functor(foo[sz],dim)
@test extrema_mapslices(foo[sz],dim) == extrema_cartesian(foo[sz],dim)
@test extrema_mapslices(foo[sz],dim) == extrema_cartesian_simd(foo[sz],dim)
@test extrema_mapslices(foo[sz],dim) == extrema_cartesianrange(foo[sz],dim)
gc(); print("mapslices "); @btime extrema_mapslices(foo[sz],dim);
gc(); print("functor "); @btime extrema_functor(foo[sz],dim);
gc(); print("cartesian "); @btime extrema_cartesian(foo[sz],dim);
gc(); print("cartesian_simd"); @btime extrema_cartesian_simd(foo[sz],dim);
gc(); print("cartesianrange"); @btime extrema_cartesianrange(foo[sz],dim);
end
@bjarthur
Copy link
Author

on a 0-day old master i get this timings:

$ src/julia/usr/bin/julia extrema.jl 
INFO: mapslices
  0.095770 seconds (510.06 k allocations: 23.817 MB, 3.18% gc time)
  0.090618 seconds (510.06 k allocations: 23.817 MB, 3.37% gc time)
  0.086245 seconds (510.06 k allocations: 23.817 MB, 3.32% gc time)
INFO: functor
  0.004045 seconds (30 allocations: 319.922 KB)
  0.005196 seconds (30 allocations: 319.922 KB)
  0.003371 seconds (30 allocations: 319.922 KB)
INFO: cartesian
  0.005351 seconds (40 allocations: 173.859 KB)
  0.005481 seconds (40 allocations: 173.859 KB)
  0.005356 seconds (40 allocations: 173.859 KB)
INFO: cartesianrange
  0.014599 seconds (44 allocations: 174.016 KB)
  0.012541 seconds (44 allocations: 174.016 KB)
  0.013048 seconds (44 allocations: 174.016 KB)

@bjarthur
Copy link
Author

on a 0-day master i now (a year later on julia 0.7) get these timings:

INFO: dim=1, sz=10
mapslices       120.681 μs (1621 allocations: 74.69 KiB)
functor         9.574 μs (22 allocations: 5.09 KiB)
cartesian       3.039 μs (4 allocations: 2.50 KiB)
cartesianrange  6.894 μs (4 allocations: 2.50 KiB)
INFO: dim=1, sz=100
mapslices       13.118 ms (123662 allocations: 12.74 MiB)
functor         4.326 ms (25 allocations: 322.86 KiB)
cartesian       2.339 ms (5 allocations: 161.39 KiB)
cartesianrange  5.556 ms (5 allocations: 161.39 KiB)
INFO: dim=1, sz=1000
mapslices       7.641 s (13018533 allocations: 7.98 GiB)
functor         3.531 s (25 allocations: 30.61 MiB)
cartesian       1.507 s (10 allocations: 15.31 MiB)
cartesianrange  4.486 s (10 allocations: 15.31 MiB)
INFO: dim=2, sz=10
mapslices       118.024 μs (1477 allocations: 69.98 KiB)
functor         5.627 μs (22 allocations: 4.73 KiB)
cartesian       2.756 μs (4 allocations: 2.30 KiB)
cartesianrange  6.737 μs (4 allocations: 2.30 KiB)
INFO: dim=2, sz=100
mapslices       14.129 ms (122438 allocations: 12.61 MiB)
functor         1.695 ms (25 allocations: 319.61 KiB)
cartesian       2.271 ms (5 allocations: 159.77 KiB)
cartesianrange  5.460 ms (5 allocations: 159.77 KiB)
INFO: dim=2, sz=1000
mapslices       10.170 s (13005016 allocations: 8.03 GiB)
functor         1.489 s (25 allocations: 30.58 MiB)
cartesian       1.478 s (10 allocations: 15.29 MiB)
cartesianrange  4.425 s (10 allocations: 15.29 MiB)
INFO: dim=(1, 2), sz=10
mapslices       18.661 μs (172 allocations: 17.50 KiB)
functor         5.925 μs (20 allocations: 1.17 KiB)
cartesian       2.470 μs (3 allocations: 544 bytes)
cartesianrange  6.443 μs (3 allocations: 544 bytes)
INFO: dim=(1, 2), sz=100
mapslices       4.729 ms (1264 allocations: 7.90 MiB)
functor         3.237 ms (20 allocations: 4.02 KiB)
cartesian       1.445 ms (3 allocations: 1.97 KiB)
cartesianrange  4.426 ms (3 allocations: 1.97 KiB)
INFO: dim=(1, 2), sz=1000
mapslices       6.231 s (12555 allocations: 7.47 GiB)
functor         3.252 s (20 allocations: 32.31 KiB)
cartesian       1.402 s (7 allocations: 16.08 KiB)
cartesianrange  4.638 s (7 allocations: 16.08 KiB)

@bjarthur
Copy link
Author

adding @inbounds @simd to the CartesianRange variant resulted in a performance equal to Cartesian. thanks tim!

INFO: dim=1, sz=10
mapslices       123.829 μs (1621 allocations: 74.69 KiB)
functor         9.804 μs (22 allocations: 5.09 KiB)
cartesian       2.978 μs (4 allocations: 2.50 KiB)
cartesianrange  3.228 μs (4 allocations: 2.50 KiB)
INFO: dim=1, sz=100
mapslices       13.223 ms (123662 allocations: 12.74 MiB)
functor         4.337 ms (25 allocations: 322.86 KiB)
cartesian       2.366 ms (5 allocations: 161.39 KiB)
cartesianrange  2.321 ms (5 allocations: 161.39 KiB)
INFO: dim=1, sz=1000
mapslices       6.598 s (13018533 allocations: 7.98 GiB)
functor         3.411 s (25 allocations: 30.61 MiB)
cartesian       1.503 s (10 allocations: 15.31 MiB)
cartesianrange  1.410 s (10 allocations: 15.31 MiB)
INFO: dim=2, sz=10
mapslices       120.972 μs (1477 allocations: 69.98 KiB)
functor         5.656 μs (22 allocations: 4.73 KiB)
cartesian       2.746 μs (4 allocations: 2.30 KiB)
cartesianrange  3.051 μs (4 allocations: 2.30 KiB)
INFO: dim=2, sz=100
mapslices       14.839 ms (122438 allocations: 12.61 MiB)
functor         1.754 ms (25 allocations: 319.61 KiB)
cartesian       2.277 ms (5 allocations: 159.77 KiB)
cartesianrange  2.268 ms (5 allocations: 159.77 KiB)
INFO: dim=2, sz=1000
mapslices       10.981 s (13005016 allocations: 8.03 GiB)
functor         1.520 s (25 allocations: 30.58 MiB)
cartesian       1.479 s (10 allocations: 15.29 MiB)
cartesianrange  1.379 s (10 allocations: 15.29 MiB)
INFO: dim=(1, 2), sz=10
mapslices       18.748 μs (172 allocations: 17.50 KiB)
functor         5.890 μs (20 allocations: 1.17 KiB)
cartesian       2.534 μs (3 allocations: 544 bytes)
cartesianrange  2.703 μs (3 allocations: 544 bytes)
INFO: dim=(1, 2), sz=100
mapslices       4.835 ms (1264 allocations: 7.90 MiB)
functor         3.306 ms (20 allocations: 4.02 KiB)
cartesian       1.393 ms (3 allocations: 1.97 KiB)
cartesianrange  1.337 ms (3 allocations: 1.97 KiB)
INFO: dim=(1, 2), sz=1000
mapslices       6.932 s (12555 allocations: 7.47 GiB)
functor         3.257 s (20 allocations: 32.31 KiB)
cartesian       1.338 s (7 allocations: 16.08 KiB)
cartesianrange  1.212 s (7 allocations: 16.08 KiB)

@bjarthur
Copy link
Author

bjarthur commented Jun 6, 2017

using ifelse is faster when dim=2 but slower otherwise. it also makes the simd and non-simd variants of cartesian equal in speed for small arrays:

%%% if ... elseif ... end                                   %%% ifelse
INFO: dim=1, sz=10                                          INFO: dim=1, sz=10
mapslices       122.846 μs (1621 allocations: 74.69 KiB)    mapslices       126.879 μs (1621 allocations: 74.69 KiB)
functor         9.191 μs (22 allocations: 5.09 KiB)         functor         10.858 μs (22 allocations: 5.09 KiB)
cartesian       2.878 μs (4 allocations: 2.50 KiB)          cartesian       3.068 μs (4 allocations: 2.50 KiB)
cartesian_simd  5.463 μs (4 allocations: 2.50 KiB)          cartesian_simd  3.117 μs (4 allocations: 2.50 KiB)
cartesianrange  3.227 μs (4 allocations: 2.50 KiB)          cartesianrange  3.354 μs (4 allocations: 2.50 KiB)
INFO: dim=1, sz=100                                         INFO: dim=1, sz=100
mapslices       13.213 ms (123662 allocations: 12.74 MiB)   mapslices       14.398 ms (123662 allocations: 12.74 MiB)
functor         4.311 ms (25 allocations: 322.86 KiB)       functor         4.357 ms (25 allocations: 322.86 KiB)
cartesian       2.290 ms (5 allocations: 161.39 KiB)        cartesian       2.437 ms (5 allocations: 161.39 KiB)
cartesian_simd  2.236 ms (5 allocations: 161.39 KiB)        cartesian_simd  2.439 ms (5 allocations: 161.39 KiB)
cartesianrange  2.242 ms (5 allocations: 161.39 KiB)        cartesianrange  2.460 ms (5 allocations: 161.39 KiB)
INFO: dim=1, sz=1000                                        INFO: dim=1, sz=1000
mapslices       8.208 s (13018533 allocations: 7.98 GiB)    mapslices       8.566 s (13018533 allocations: 7.98 GiB)
functor         4.036 s (25 allocations: 30.61 MiB)         functor         4.599 s (25 allocations: 30.61 MiB)
cartesian       1.796 s (10 allocations: 15.31 MiB)         cartesian       3.023 s (10 allocations: 15.31 MiB)
cartesian_simd  1.790 s (10 allocations: 15.31 MiB)         cartesian_simd  2.878 s (10 allocations: 15.31 MiB)
cartesianrange  1.699 s (10 allocations: 15.31 MiB)         cartesianrange  2.981 s (10 allocations: 15.31 MiB)
INFO: dim=2, sz=10                                          INFO: dim=2, sz=10
mapslices       122.202 μs (1477 allocations: 69.98 KiB)    mapslices       126.456 μs (1477 allocations: 69.98 KiB)
functor         5.644 μs (22 allocations: 4.73 KiB)         functor         5.885 μs (22 allocations: 4.73 KiB)
cartesian       2.850 μs (4 allocations: 2.30 KiB)          cartesian       2.669 μs (4 allocations: 2.30 KiB)
cartesian_simd  5.388 μs (4 allocations: 2.30 KiB)          cartesian_simd  2.690 μs (4 allocations: 2.30 KiB)
cartesianrange  3.053 μs (4 allocations: 2.30 KiB)          cartesianrange  2.641 μs (4 allocations: 2.30 KiB)
INFO: dim=2, sz=100                                         INFO: dim=2, sz=100
mapslices       15.031 ms (122438 allocations: 12.61 MiB)   mapslices       15.137 ms (122438 allocations: 12.61 MiB)
functor         1.617 ms (25 allocations: 319.61 KiB)       functor         1.662 ms (25 allocations: 319.61 KiB)
cartesian       2.205 ms (5 allocations: 159.77 KiB)        cartesian       1.277 ms (5 allocations: 159.77 KiB)
cartesian_simd  2.212 ms (5 allocations: 159.77 KiB)        cartesian_simd  1.242 ms (5 allocations: 159.77 KiB)
cartesianrange  2.200 ms (5 allocations: 159.77 KiB)        cartesianrange  1.283 ms (5 allocations: 159.77 KiB)
INFO: dim=2, sz=1000                                        INFO: dim=2, sz=1000
mapslices       12.855 s (13005016 allocations: 8.03 GiB)   mapslices       9.179 s (13005016 allocations: 8.03 GiB)
functor         2.026 s (25 allocations: 30.58 MiB)         functor         1.991 s (25 allocations: 30.58 MiB)
cartesian       1.760 s (10 allocations: 15.29 MiB)         cartesian       1.477 s (10 allocations: 15.29 MiB)
cartesian_simd  1.915 s (10 allocations: 15.29 MiB)         cartesian_simd  1.632 s (10 allocations: 15.29 MiB)
cartesianrange  1.716 s (10 allocations: 15.29 MiB)         cartesianrange  1.449 s (10 allocations: 15.29 MiB)
INFO: dim=(1, 2), sz=10                                     INFO: dim=(1, 2), sz=10
mapslices       21.477 μs (172 allocations: 17.50 KiB)      mapslices       19.838 μs (172 allocations: 17.50 KiB)
functor         5.953 μs (20 allocations: 1.17 KiB)         functor         5.947 μs (20 allocations: 1.17 KiB)
cartesian       2.469 μs (3 allocations: 544 bytes)         cartesian       3.662 μs (3 allocations: 544 bytes)
cartesian_simd  2.746 μs (3 allocations: 544 bytes)         cartesian_simd  3.666 μs (3 allocations: 544 bytes)
cartesianrange  2.611 μs (3 allocations: 544 bytes)         cartesianrange  3.633 μs (3 allocations: 544 bytes)
INFO: dim=(1, 2), sz=100                                    INFO: dim=(1, 2), sz=100
mapslices       3.584 ms (1264 allocations: 7.90 MiB)       mapslices       3.539 ms (1264 allocations: 7.90 MiB)
functor         3.213 ms (20 allocations: 4.02 KiB)         functor         3.268 ms (20 allocations: 4.02 KiB)
cartesian       1.390 ms (3 allocations: 1.97 KiB)          cartesian       2.592 ms (3 allocations: 1.97 KiB)
cartesian_simd  1.387 ms (3 allocations: 1.97 KiB)          cartesian_simd  2.593 ms (3 allocations: 1.97 KiB)
cartesianrange  1.293 ms (3 allocations: 1.97 KiB)          cartesianrange  2.592 ms (3 allocations: 1.97 KiB)
INFO: dim=(1, 2), sz=1000                                   INFO: dim=(1, 2), sz=1000
mapslices       7.608 s (12555 allocations: 7.47 GiB)       mapslices       6.978 s (12555 allocations: 7.47 GiB)
functor         4.163 s (20 allocations: 32.31 KiB)         functor         3.982 s (20 allocations: 32.31 KiB)
cartesian       1.615 s (7 allocations: 16.08 KiB)          cartesian       2.916 s (7 allocations: 16.08 KiB)
cartesian_simd  1.616 s (7 allocations: 16.08 KiB)          cartesian_simd  2.854 s (7 allocations: 16.08 KiB)
cartesianrange  1.535 s (7 allocations: 16.08 KiB)          cartesianrange  2.909 s (7 allocations: 16.08 KiB)```

@bjarthur
Copy link
Author

bjarthur commented Jun 6, 2017

strange that @simd did not improve the Cartesian variant much, as it had a huge effect on the CartesianRange variant.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment