Several approaches are compared.
- Custom function
- Query package
- NormalizedQuantiles package
Results:
Method | Function | 8 elements | 1,000 elements | 10,000 elements |
---|---|---|---|---|
Custom function | woz | 1 μs | 20 μs | 174 μs |
Query | foo | 4 μs | 49 μs | 425 μs |
NormalizedQuantiles | bar | 47 μs | 10,658 μs | 133,827 μs |
julia> A8 = [1,2,3,4,1,2,4,4]
8-element Array{Int64,1}:
1
2
3
4
1
2
4
4
julia> A1000 = rand(1:20, 1000);
julia> A10k = rand(1:20, 10_000);
julia> woz(A8)
4-element Array{Array{Int64,1},1}:
[1, 5]
[2, 6]
[3]
[4, 7, 8]
julia> foo(A8)
4-element Array{QueryOperators.Grouping{Int64,Int64},1}:
[1, 5]
[2, 6]
[3]
[4, 7, 8]
julia> bar(A8)
Dict{Int64,Array{Int64,N} where N} with 4 entries:
4 => [4, 7, 8]
2 => [2, 6]
3 => [3]
1 => [1, 5]
Results should be identical. Note that the custom function has the grouped arrays in random order so we must sort to validate the results.
julia> sort(woz(A10k,-1), 1; lt = (x,y) -> x[1] < y[1]) == foo(A10k)
true
julia> function woz(A::Vector{T}, sentinel::T) where T
p = sortperm(A)
q = A[p]
res = Vector{Vector{T}}()
grp = Vector{T}()
first = true
last = sentinel
for (i,v) in enumerate(q)
if !first && last != v
push!(res, grp)
grp = [p[i]]
else
push!(grp, p[i])
end
last = v
first = false
end
push!(res, grp)
res
end
julia> @benchmark woz($A8, -1)
BenchmarkTools.Trial:
memory estimate: 976 bytes
allocs estimate: 14
--------------
minimum time: 767.400 ns (0.00% GC)
median time: 831.605 ns (0.00% GC)
mean time: 1.050 μs (14.51% GC)
maximum time: 44.476 μs (96.31% GC)
--------------
samples: 10000
evals/sample: 110
julia> @benchmark woz($A1000, -1)
BenchmarkTools.Trial:
memory estimate: 41.77 KiB
allocs estimate: 148
--------------
minimum time: 19.981 μs (0.00% GC)
median time: 21.696 μs (0.00% GC)
mean time: 27.635 μs (14.65% GC)
maximum time: 2.955 ms (96.19% GC)
--------------
samples: 10000
evals/sample: 1
julia> @benchmark woz($A10k, -1)
BenchmarkTools.Trial:
memory estimate: 358.30 KiB
allocs estimate: 214
--------------
minimum time: 174.231 μs (0.00% GC)
median time: 201.655 μs (0.00% GC)
mean time: 274.391 μs (16.37% GC)
maximum time: 7.072 ms (84.69% GC)
--------------
samples: 10000
evals/sample: 1
julia> foo(A) = 1:length(A) |> @groupby(A[_]) |> collect
foo (generic function with 1 method)
julia> @benchmark foo($A8)
BenchmarkTools.Trial:
memory estimate: 2.59 KiB
allocs estimate: 46
--------------
minimum time: 3.804 μs (0.00% GC)
median time: 3.941 μs (0.00% GC)
mean time: 4.774 μs (10.40% GC)
maximum time: 597.961 μs (96.70% GC)
--------------
samples: 10000
evals/sample: 8
julia> @benchmark foo($A1000)
BenchmarkTools.Trial:
memory estimate: 29.31 KiB
allocs estimate: 200
--------------
minimum time: 48.980 μs (0.00% GC)
median time: 51.632 μs (0.00% GC)
mean time: 57.908 μs (5.12% GC)
maximum time: 2.605 ms (94.69% GC)
--------------
samples: 10000
evals/sample: 1
julia> @benchmark foo($A10k)
BenchmarkTools.Trial:
memory estimate: 205.31 KiB
allocs estimate: 264
--------------
minimum time: 424.572 μs (0.00% GC)
median time: 444.718 μs (0.00% GC)
mean time: 510.706 μs (4.72% GC)
maximum time: 4.823 ms (88.27% GC)
--------------
samples: 9743
evals/sample: 1
julia> using NormalizeQuantiles
julia> bar(A) = sampleRanks(A,resultMatrix=true)[2]
bar (generic function with 1 method)
julia> bar(A8)
Dict{Int64,Array{Int64,N} where N} with 4 entries:
4 => [4, 7, 8]
2 => [2, 6]
3 => [3]
1 => [1, 5]
julia> @benchmark bar($A8)
BenchmarkTools.Trial:
memory estimate: 11.72 KiB
allocs estimate: 228
--------------
minimum time: 46.661 μs (0.00% GC)
median time: 49.572 μs (0.00% GC)
mean time: 56.826 μs (2.67% GC)
maximum time: 4.235 ms (94.83% GC)
--------------
samples: 10000
evals/sample: 1
julia> @benchmark bar($A1000)
BenchmarkTools.Trial:
memory estimate: 2.20 MiB
allocs estimate: 39874
--------------
minimum time: 10.658 ms (0.00% GC)
median time: 11.537 ms (0.00% GC)
mean time: 12.173 ms (2.93% GC)
maximum time: 18.103 ms (20.36% GC)
--------------
samples: 411
evals/sample: 1
julia> @benchmark bar($A10k)
BenchmarkTools.Trial:
memory estimate: 74.90 MiB
allocs estimate: 418280
--------------
minimum time: 133.827 ms (5.97% GC)
median time: 143.431 ms (6.78% GC)
mean time: 145.699 ms (8.46% GC)
maximum time: 229.198 ms (44.02% GC)
--------------
samples: 35
evals/sample: 1