Skip to content

Instantly share code, notes, and snippets.

@laborg
Created September 10, 2018 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save laborg/f9af0f5c2def4edfa410c5522afbff1d to your computer and use it in GitHub Desktop.
Save laborg/f9af0f5c2def4edfa410c5522afbff1d to your computer and use it in GitHub Desktop.
Explicit implementation of uniquifying algorithms
# Implementation bodies for unique and unique!
function _unique(itr, f::Union{Nothing, Callable} = nothing)
T = @default_eltype(itr)
y = iterate(itr)
y === nothing && return T[]
x, st = y
R = !isconcretetype(T) && IteratorEltype(itr) == EltypeUnknown() ? typeof(x) : T
f === nothing && return __uniquify(itr, R[], Set{R}(), st, x)
fx = f(x)
return __uniquify(itr, R[], Set{typeof(fx)}(), st, x, f, fx)
end
function _unique!(A::AbstractVector, f::Union{Callable, Nothing}, sorted::Bool)
isempty(A) && return A
sorted && return _uniquify_sorted!(A)
x, st = iterate(A)
f === nothing && return __uniquify!(A, Set{typeof(x)}(), firstindex(A), st, x)
fx = f(x)
return __uniquify!(A, Set{typeof(fx)}(), firstindex(A), st, x, f, fx)
end
# out of place unique
__uniquify(itr, out, seen, st, x) = _uniquify(itr, out, seen, st, x)
@inline function _uniquify(itr, out::Vector{T}, seen, st, x) where T
!in(x, seen) && (push!(seen, x); push!(out, x))
y = iterate(itr, st)
while y !== nothing
x, st = y
x isa T || return __uniquify(itr, _adj(out, x), _adj(seen, x), st, x)
!in(x, seen) && (push!(seen, x); push!(out, x))
y = iterate(itr, st)
end
return out
end
# out of place unique with f
__uniquify(itr, out, seen, st, x, f, fx) = _uniquify(itr, out, seen, st, x, f, fx)
@inline function _uniquify(itr, out::Vector{T}, seen::Set{S}, st, x, f, fx) where {T,S}
!in(fx, seen) && (push!(seen, fx); push!(out, x))
y = iterate(itr, st)
while y !== nothing
x, st = y
fx = f(x)
x isa T && fx isa S || return __uniquify(itr, _adj(out, x), _adj(seen, fx), st, x, f, fx)
!in(fx, seen) && (push!(seen, fx); push!(out, x))
y = iterate(itr, st)
end
return out
end
# inplace unique
__uniquify!(A::AbstractVector, seen, i, st, x) = _uniquify!(A, seen, i, st, x)
@inline function _uniquify!(A, seen::Set{S}, i, st, x) where S
!in(x, seen) && (push!(seen, x); A[i] = x; i += 1)
y = iterate(A, st)
while y !== nothing
x, st = y
x isa S || return __uniquify!(A, _adj(seen, x), i, st, x)
!in(x, seen) && (push!(seen, x); A[i] = x; i += 1)
y = iterate(A, st)
end
return resize!(A, i - firstindex(A))
end
# inplace unique with f
__uniquify!(A::AbstractVector, seen, i, st, x, f, fx) = _uniquify!(A, seen, i, st, x, f, fx)
@inline function _uniquify!(A, seen::Set{S}, i, st, x, f, fx) where S
!in(fx, seen) && (push!(seen, fx); A[i] = x; i += 1)
y = iterate(A, st)
while y !== nothing
x, st = y
fx = f(x)
fx isa S || return __uniquify!(A, _adj(seen, fx), i, st, x, f, fx)
!in(fx, seen) && (push!(seen, fx); A[i] = x; i += 1)
y = iterate(A, st)
end
return resize!(A, i - firstindex(A))
end
# Specififc implementation for sorted data with inplace update
function _uniquify_sorted!(A::AbstractVector)
i = firstindex(A) + 1
a = iterate(A)
lx, st = a
while a !== nothing
x, st = a
isequal(lx, x) || (lx = A[i] = x; i += 1)
a = iterate(A, st)
end
resize!(A, i - firstindex(A))
end
# if an unfitting element type appears, adjust the data structures
# make sure the 'out' array always fits the element
# if the 'seen' set has to deal with abstract types switch to Any
_adj(out::Vector{T}, ::U) where {T,U} = convert(Vector{promote_typejoin(T,U)}, out)
_adj(seen::Set{T}, ::U) where {T,U} = convert(Set{T === U ? T : Any}, seen)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment