Created
September 10, 2018 14:21
-
-
Save laborg/f9af0f5c2def4edfa410c5522afbff1d to your computer and use it in GitHub Desktop.
Explicit implementation of uniquifying algorithms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Implementation bodies for unique and unique! | |
function _unique(itr, f::Union{Nothing, Callable} = nothing) | |
T = @default_eltype(itr) | |
y = iterate(itr) | |
y === nothing && return T[] | |
x, st = y | |
R = !isconcretetype(T) && IteratorEltype(itr) == EltypeUnknown() ? typeof(x) : T | |
f === nothing && return __uniquify(itr, R[], Set{R}(), st, x) | |
fx = f(x) | |
return __uniquify(itr, R[], Set{typeof(fx)}(), st, x, f, fx) | |
end | |
function _unique!(A::AbstractVector, f::Union{Callable, Nothing}, sorted::Bool) | |
isempty(A) && return A | |
sorted && return _uniquify_sorted!(A) | |
x, st = iterate(A) | |
f === nothing && return __uniquify!(A, Set{typeof(x)}(), firstindex(A), st, x) | |
fx = f(x) | |
return __uniquify!(A, Set{typeof(fx)}(), firstindex(A), st, x, f, fx) | |
end | |
# out of place unique | |
__uniquify(itr, out, seen, st, x) = _uniquify(itr, out, seen, st, x) | |
@inline function _uniquify(itr, out::Vector{T}, seen, st, x) where T | |
!in(x, seen) && (push!(seen, x); push!(out, x)) | |
y = iterate(itr, st) | |
while y !== nothing | |
x, st = y | |
x isa T || return __uniquify(itr, _adj(out, x), _adj(seen, x), st, x) | |
!in(x, seen) && (push!(seen, x); push!(out, x)) | |
y = iterate(itr, st) | |
end | |
return out | |
end | |
# out of place unique with f | |
__uniquify(itr, out, seen, st, x, f, fx) = _uniquify(itr, out, seen, st, x, f, fx) | |
@inline function _uniquify(itr, out::Vector{T}, seen::Set{S}, st, x, f, fx) where {T,S} | |
!in(fx, seen) && (push!(seen, fx); push!(out, x)) | |
y = iterate(itr, st) | |
while y !== nothing | |
x, st = y | |
fx = f(x) | |
x isa T && fx isa S || return __uniquify(itr, _adj(out, x), _adj(seen, fx), st, x, f, fx) | |
!in(fx, seen) && (push!(seen, fx); push!(out, x)) | |
y = iterate(itr, st) | |
end | |
return out | |
end | |
# inplace unique | |
__uniquify!(A::AbstractVector, seen, i, st, x) = _uniquify!(A, seen, i, st, x) | |
@inline function _uniquify!(A, seen::Set{S}, i, st, x) where S | |
!in(x, seen) && (push!(seen, x); A[i] = x; i += 1) | |
y = iterate(A, st) | |
while y !== nothing | |
x, st = y | |
x isa S || return __uniquify!(A, _adj(seen, x), i, st, x) | |
!in(x, seen) && (push!(seen, x); A[i] = x; i += 1) | |
y = iterate(A, st) | |
end | |
return resize!(A, i - firstindex(A)) | |
end | |
# inplace unique with f | |
__uniquify!(A::AbstractVector, seen, i, st, x, f, fx) = _uniquify!(A, seen, i, st, x, f, fx) | |
@inline function _uniquify!(A, seen::Set{S}, i, st, x, f, fx) where S | |
!in(fx, seen) && (push!(seen, fx); A[i] = x; i += 1) | |
y = iterate(A, st) | |
while y !== nothing | |
x, st = y | |
fx = f(x) | |
fx isa S || return __uniquify!(A, _adj(seen, fx), i, st, x, f, fx) | |
!in(fx, seen) && (push!(seen, fx); A[i] = x; i += 1) | |
y = iterate(A, st) | |
end | |
return resize!(A, i - firstindex(A)) | |
end | |
# Specififc implementation for sorted data with inplace update | |
function _uniquify_sorted!(A::AbstractVector) | |
i = firstindex(A) + 1 | |
a = iterate(A) | |
lx, st = a | |
while a !== nothing | |
x, st = a | |
isequal(lx, x) || (lx = A[i] = x; i += 1) | |
a = iterate(A, st) | |
end | |
resize!(A, i - firstindex(A)) | |
end | |
# if an unfitting element type appears, adjust the data structures | |
# make sure the 'out' array always fits the element | |
# if the 'seen' set has to deal with abstract types switch to Any | |
_adj(out::Vector{T}, ::U) where {T,U} = convert(Vector{promote_typejoin(T,U)}, out) | |
_adj(seen::Set{T}, ::U) where {T,U} = convert(Set{T === U ? T : Any}, seen) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment