Skip to content

Instantly share code, notes, and snippets.

@carlobaldassi
Last active Dec 18, 2015
Embed
What would you like to do?
Alternative Zip iterator, with better performance than the one in base, but whose performance degrades if calls to start,next,done are inlined.
module NZipTest
import Base.start, Base.next, Base.done, Base.length
using Benchmark
immutable NZip0
end
immutable NZip1{I1}
i1::I1
end
immutable NZip2{I1,I2}
i1::I1
i2::I2
end
immutable NZip3{I1,I2,I3}
i1::I1
i2::I2
i3::I3
end
function nzip(itrs...)
l = length(itrs) + 1
tp = [NZip0, NZip1, NZip2, NZip3]
l > length(tp) && error("unsupported")
return tp[l](itrs...)
end
immutable NZipState0
end
immutable NZipState1{S1}
s1::S1
end
immutable NZipState2{S1,S2}
s1::S1
s2::S2
end
immutable NZipState3{S1,S2,S3}
s1::S1
s2::S2
s3::S3
end
start(z::NZip0) = NZipState0()
start(z::NZip1) = NZipState1(start(z.i1))
start(z::NZip2) = NZipState2(start(z.i1), start(z.i2))
start(z::NZip3) = NZipState3(start(z.i1), start(z.i2), start(z.i3))
next(z::NZip0, s::NZipState0) = (), s
function next(z::NZip1, s::NZipState1)
v1,s1 = next(z.i1, s.s1)
return (v1,),NZipState1(s1)
end
function next(z::NZip2, s::NZipState2)
v1,s1 = next(z.i1, s.s1)
v2,s2 = next(z.i2, s.s2)
return (v1,v2),NZipState2(s1,s2)
end
function next(z::NZip3, s::NZipState3)
v1,s1 = next(z.i1, s.s1)
v2,s2 = next(z.i2, s.s2)
v3,s3 = next(z.i3, s.s3)
return (v1,v2,v3),NZipState3(s1,s2,s3)
end
done(z::NZip0, s::NZipState0) = true
done(z::NZip1, s::NZipState1) = done(z.i1, s.s1)
done(z::NZip2, s::NZipState2) = done(z.i1, s.s1) || done(z.i2, s.s2)
done(z::NZip3, s::NZipState3) = done(z.i1, s.s1) || done(z.i2, s.s2) || done(z.i3, s.s3)
dosomething(u, v) = u + v
dosomething(u, v, w) = u + v = w
function perf()
n = 10_000
const a = rand(n)
const b = randbool(n)
#const b = rand(n)
ziptst() = for (u, v) in zip(a, b) dosomething(u,v) end
function ziptstinline()
z = zip(a, b)
state = { start(itr) for itr in z.itrs }
while !done(z, state)
for i = 1:length(z.itrs)
z.vals[i], state[i] = next(z.itrs[i], state[i])
end
(u, v), state = tuple(z.vals...), state
dosomething(u, v)
end
end
nziptst() = for (u, v) in nzip(a, b) dosomething(u,v) end
function nziptstinline()
z = nzip(a,b)
state = NZipState2(start(z.i1), start(z.i2))
while !(done(z.i1, state.s1) || done(z.i2, state.s2))
v1,s1 = next(z.i1, state.s1)
v2,s2 = next(z.i2, state.s2)
(u,v),state = (v1,v2),NZipState2(s1,s2)
dosomething(u, v)
end
end
function manualzip()
sa,sb = start(a),start(b)
while !(done(a,sa) || done(b,sb))
(u,sa),(v,sb) = next(a,sa), next(b,sb)
dosomething(u, v)
end
end
compare([ziptst, ziptstinline, nziptst, nziptstinline, manualzip], 100)
#compare([ziptst, nziptst, nziptstinline, manualzip], 100)
end
function perf3()
n = 10_000
const a = rand(n)
const b = randbool(n)
const c = rand(n)
ziptst() = for (u, v, w) in zip(a, b, c) dosomething(u,v,w) end
function ziptstinline()
z = zip(a, b, c)
state = { start(itr) for itr in z.itrs }
while !done(z, state)
for i = 1:length(z.itrs)
z.vals[i], state[i] = next(z.itrs[i], state[i])
end
(u, v, w), state = tuple(z.vals...), state
dosomething(u, v, w)
end
end
nziptst() = for (u, v, w) in nzip(a, b, c) dosomething(u,v,w) end
function nziptstinline()
z = nzip(a, b, c)
state = NZipState3(start(z.i1), start(z.i2), start(z.i3))
while !(done(z.i1, state.s1) || done(z.i2, state.s2) || done(z.i3, state.s3))
v1,s1 = next(z.i1, state.s1)
v2,s2 = next(z.i2, state.s2)
v3,s3 = next(z.i3, state.s3)
(u,v,w),state = (v1,v2,v3),NZipState3(s1,s2,s3)
dosomething(u, v, w)
end
end
function manualzip()
sa,sb,sc = start(a),start(b),start(c)
while !(done(a,sa) || done(b,sb) || done(c,sc))
(u,sa),(v,sb),(w,sc) = next(a,sa), next(b,sb), next(c,sc)
dosomething(u, v, w)
end
end
compare([ziptst, ziptstinline, nziptst, nziptstinline, manualzip], 100)
end
end
julia> include("nzipperf.jl")
julia> NZipTest.perf()
5x4 DataFrame:
Function Elapsed Relative Replications
[1,] "ziptst" 2.49119 728.793 100
[2,] "ziptstinline" 2.53212 740.766 100
[3,] "nziptst" 0.596402 174.476 100
[4,] "nziptstinline" 1.3054 381.891 100
[5,] "manualzip" 0.00341824 1.0 100
julia> NZipTest.perf3()
5x4 DataFrame:
Function Elapsed Relative Replications
[1,] "ziptst" 4.60007 10.1525 100
[2,] "ziptstinline" 4.35195 9.60491 100
[3,] "nziptst" 1.31655 2.90568 100
[4,] "nziptstinline" 2.4554 5.41916 100
[5,] "manualzip" 0.453096 1.0 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment