Skip to content

Instantly share code, notes, and snippets.

@iamed2
Last active May 30, 2018 19:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iamed2/c6556d004adca0877b64d3553447e59c to your computer and use it in GitHub Desktop.
Save iamed2/c6556d004adca0877b64d3553447e59c to your computer and use it in GitHub Desktop.
How to write a GroupBy iterator: bad and good (requires IterTools at branch ed/iterate)
using IterTools
using IterTools: PeekIter
import Base: iterate, eltype
using Base: IteratorSize, SizeUnknown
macro something(ex)
quote
result = $(esc(ex))
result === nothing && return nothing
result
end
end
struct GroupBy2{I, F<:Base.Callable}
xs::PeekIter{Base.Generator{I, F}}
end
IteratorSize(::Type{<:GroupBy2}) = SizeUnknown()
eltype(::Type{<:GroupBy2{I}}) where {I} = Vector{eltype(I)}
function groupby2(keyfunc::F, xs::I) where {F<:Base.Callable, I}
keyval(x) = (keyfunc(x), x)
gen = Base.Generator(keyval, xs)
GroupBy2{I, typeof(keyval)}(PeekIter(gen))
end
function iterate(it::GroupBy2{I, F}, state=()) where {I, F<:Base.Callable}
values = Vector{eltype(I)}()
((key, val), xs_state) = @something iterate(it.xs, state...)
push!(values, val)
peeked = peek(it.xs, xs_state)
while peeked !== nothing && coalesce(peeked)[1] == key
((_, val), xs_state) = iterate(it.xs, xs_state)
push!(values, val)
peeked = peek(it.xs, xs_state)
end
return (values, (xs_state,))
end
julia> @benchmark collect(groupby2(first, ["face", "foo", "bar", "book", "baz"]))
BenchmarkTools.Trial:
memory estimate: 1.98 KiB
allocs estimate: 60
--------------
minimum time: 14.258 μs (0.00% GC)
median time: 15.473 μs (0.00% GC)
mean time: 16.915 μs (0.00% GC)
maximum time: 207.366 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 1
julia> @benchmark collect(groupby1(first, ["face", "foo", "bar", "book", "baz"]))
BenchmarkTools.Trial:
memory estimate: 656 bytes
allocs estimate: 12
--------------
minimum time: 299.350 ns (0.00% GC)
median time: 314.264 ns (0.00% GC)
mean time: 412.854 ns (20.56% GC)
maximum time: 202.104 μs (99.77% GC)
--------------
samples: 10000
evals/sample: 254
groupby(f, xs)

Group consecutive values that share the same result of applying f.

julia> for i in groupby(x -> x[1], ["face", "foo", "bar", "book", "baz", "zzz"])
           @show i
       end
i = String["face", "foo"]
i = String["bar", "book", "baz"]
i = String["zzz"]
using IterTools
import Base: iterate, eltype
using Base: IteratorSize, SizeUnknown
macro something(ex)
quote
result = $(esc(ex))
result === nothing && return nothing
result
end
end
macro something(ex)
quote
result = $(esc(ex))
result === nothing && return nothing
result
end
end
struct GroupBy1{I, F<:Base.Callable}
xs::I
keyfunc::F
end
IteratorSize(::Type{<:GroupBy1}) = SizeUnknown()
eltype(::Type{<:GroupBy1{I}}) where {I} = Vector{eltype(I)}
function groupby1(keyfunc::F, xs::I) where {F<:Base.Callable, I}
GroupBy1{I, F}(xs, keyfunc)
end
function iterate(it::GroupBy1{I, F}, state=nothing) where {I, F<:Base.Callable}
if state === nothing
prev_val, xs_state = @something iterate(it.xs)
prev_key = it.keyfunc(prev_val)
keep_going = true
else
keep_going, prev_key, prev_val, xs_state = state
keep_going || return nothing
end
values = Vector{eltype(I)}()
push!(values, prev_val)
while true
xs_iter = iterate(it.xs, xs_state)
if xs_iter === nothing
keep_going = false
break
end
val, xs_state = xs_iter
key = it.keyfunc(val)
if key == prev_key
push!(values, val)
else
prev_key = key
prev_val = val
break
end
end
return (values, (keep_going, prev_key, prev_val, xs_state))
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment