Skip to content

Instantly share code, notes, and snippets.

@Sacha0
Last active December 27, 2015 22:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Sacha0/7c0b882b92292ecd7ff4 to your computer and use it in GitHub Desktop.
Save Sacha0/7c0b882b92292ecd7ff4 to your computer and use it in GitHub Desktop.
Benchmark indexing through triangular types in Julia.
import Base.LinAlg.UnitLowerTriangular
import Base.LinAlg.UnitUpperTriangular
# Define getindex candidates for LowerTriangular matrices (tern_ is the original)
tern_getindex{T,S}(A::LowerTriangular{T,S}, i::Integer, j::Integer) = i >= j ? A.data[i,j] : zero(A.data[j,i])
ternz_getindex{T,S}(A::LowerTriangular{T,S}, i::Integer, j::Integer) = i >= j ? A.data[i,j] : zero(T)
ifelse_getindex{T,S}(A::LowerTriangular{T,S}, i::Integer, j::Integer) = ifelse(i >= j, A.data[i,j], zero(A.data[j,i]))
ifelsez_getindex{T,S}(A::LowerTriangular{T,S}, i::Integer, j::Integer) = ifelse(i >= j, A.data[i,j], zero(T))
ltmethstrings = ("tern", "ternz", "ifelse", "ifelsez")
# Define getindex candidates for UnitLowerTriangular matrices (tern_ is the original)
tern_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = i == j ? one(T) : (i > j ? A.data[i,j] : zero(A.data[j,i]))
ternz_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = i == j ? one(T) : (i > j ? A.data[i,j] : zero(T))
ifelse_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = ifelse(i == j, one(T), ifelse(i > j, A.data[i,j], zero(A.data[j,i])))
ifelsez_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = ifelse(i == j, one(T), ifelse(i > j, A.data[i,j], zero(T)))
mixro_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = i > j ? A.data[i,j] : ifelse(i == j, one(T), zero(A.data[j,i]))
mixroz_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = i > j ? A.data[i,j] : ifelse(i == j, one(T), zero(T))
ultmethstrings = ("tern", "ternz", "ifelse", "ifelsez", "mixro")
# Directly index the underlying datastructure for comparison. Incorrect when operating over entire matrix.
dref_getindex{T,S}(A::LowerTriangular{T,S}, i::Integer, j::Integer) = A.data[i,j]
dref_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = A.data[i,j]
# Define benchmark functions
# xorlt() scans column-wise over the lower triangular half of the matrix, performing an xor for each element
# xorall() scans column-wise over the entire matrix, performing an xor for each element
# sumlt() scans column-wise over the lower triangular half of the matrix, summing the elements as it goes
# sumall() scans column-wise over the entire matrix, summing the elements as it goes
# ... using master getindex
function master_xorlt(A::Union{LowerTriangular, UnitLowerTriangular}, n, s)
@inbounds for j in 1:n, i in j:n
s $= getindex(A, i, j)
end
s
end
function master_xorall(A::Union{LowerTriangular, UnitLowerTriangular}, n, s)
@inbounds for j in 1:n, i in 1:n
s $= getindex(A, i, j)
end
s
end
function master_sumlt(A::Union{LowerTriangular, UnitLowerTriangular}, n, s)
@inbounds for j in 1:n, i in j:n
s += getindex(A, i, j)
end
s
end
function master_sumall(A::Union{LowerTriangular, UnitLowerTriangular}, n, s)
@inbounds for j in 1:n, i in 1:n
s += getindex(A, i, j)
end
s
end
# ... using the various getindex methods defined above
for tritype in (LowerTriangular, UnitLowerTriangular)
for methstring in ("dref", (tritype == LowerTriangular ? ltmethstrings : ultmethstrings)...)
@eval function $(symbol(methstring * "_xorlt"))(A::$tritype, n, s)
@inbounds for j in 1:n, i in j:n
s $= $(symbol(methstring * "_getindex"))(A, i, j)
end
s
end
@eval function $(symbol(methstring * "_xorall"))(A::$tritype, n, s)
@inbounds for j in 1:n, i in 1:n
s $= $(symbol(methstring * "_getindex"))(A, i, j)
end
s
end
@eval function $(symbol(methstring * "_sumlt"))(A::$tritype, n, s)
@inbounds for j in 1:n, i in j:n
s += $(symbol(methstring * "_getindex"))(A, i, j)
end
s
end
@eval function $(symbol(methstring * "_sumall"))(A::$tritype, n, s)
@inbounds for j in 1:n, i in 1:n
s += $(symbol(methstring * "_getindex"))(A, i, j)
end
s
end
end
end
# Test getindex methods via the reductions
testn = 10
testmat = rand(testn, testn);
lttestmat = LowerTriangular(testmat);
ulttestmat = UnitLowerTriangular(testmat);
goodsumlt_lt = master_sumlt(lttestmat, testn, zero(Float64));
goodsumlt_ult = master_sumlt(ulttestmat, testn, zero(Float64));
goodsumall_lt = master_sumall(lttestmat, testn, zero(Float64));
goodsumall_ult = master_sumall(ulttestmat, testn, zero(Float64));
for methstring in ltmethstrings
sumlt_lt = @eval ($(symbol(methstring * "_sumlt")))(lttestmat, testn, zero(Float64))
sumall_lt = @eval ($(symbol(methstring * "_sumall")))(lttestmat, testn, zero(Float64))
goodsumlt_lt == sumlt_lt || error("Failure on sumlt_lt for method $methstring!")
goodsumall_lt == sumall_lt || error("Failure on sumall_lt for method $methstring!")
end
for methstring in ultmethstrings
sumlt_ult = @eval ($(symbol(methstring * "_sumlt")))(ulttestmat, testn, zero(Float64))
sumall_ult = @eval ($(symbol(methstring * "_sumall")))(ulttestmat, testn, zero(Float64))
goodsumlt_ult == sumlt_ult || error("Failure on sumlt_ult for method $methstring!")
goodsumall_ult == sumall_ult || error("Failure on sumall_ult for method $methstring!")
end
# Now the benchmarks proper
using Benchmarks: @benchmark, SummaryStatistics
function benchtimes(scanfuncsym, matsym)
stats = @eval SummaryStatistics(@benchmark ($scanfuncsym)($matsym, matsize, zero(eltype($matsym))))
(stats.elapsed_time_center, get(stats.elapsed_time_lower), get(stats.elapsed_time_upper))
end
function prettytimes(timecenter, timelower, timeupper)
# based on Benchmarks.pretty_time_string
timecenter < 1_000.0 ? (scalefactor = 1.0; units = "ns") :
timecenter < 1_000_000.0 ? (scalefactor = 1_000.0; units = "μs") :
timecenter < 1_000_000_000.0 ? (scalefactor = 1_000_000.0; units = "ms") :
(scalefactor = 1_000_000_000.0; units = " s")
@sprintf("%6.2f %s [%6.2f,%6.2f]", timecenter/scalefactor, units, timelower/scalefactor, timeupper/scalefactor)
end
# text display widths used below
wm, wt = 10, 25
# matrix sizes to test
matsizes = (100, 1000, 3000)
# perform LowerTriangular benchmarks
for m in matsizes
global matsize = m
global ltdensefloatA = LowerTriangular(rand(m,m));
global ltdenseintA = LowerTriangular(rand(Int, m, m));
global ltsparsefloatA = LowerTriangular(sprand(m, m, 0.05));
global ltsparseintA = LowerTriangular(convert(SparseMatrixCSC{Int, Int}, round(Int, sprand(m, m, 0.05))));
println("LowerTriangular $m x $m")
println(" $(lpad("[getindex]", wm)) $(lpad("xorlt/dense", wt)), $(lpad("xorall/dense", wt)) | $(lpad("xorlt/sparse", wt)), $(lpad("xorall/sparse", wt))")
for methstring in ("dref", "master", ltmethstrings...)
@printf("%s: %s, %s | %s, %s\n",
lpad(methstring, wm),
prettytimes(benchtimes(symbol(methstring * "_xorlt"), :ltdenseintA)...),
prettytimes(benchtimes(symbol(methstring * "_xorall"), :ltdenseintA)...),
prettytimes(benchtimes(symbol(methstring * "_xorlt"), :ltsparseintA)...),
prettytimes(benchtimes(symbol(methstring * "_xorall"), :ltsparseintA)...) )
end
println(" $(lpad("[getindex]", wm)) $(lpad("sumlt/dense", wt)), $(lpad("sumall/dense", wt)) | $(lpad("sumlt/sparse", wt)), $(lpad("sumall/sparse", wt))")
for methstring in ("dref", "master", ltmethstrings...)
@printf("%s: %s, %s | %s, %s\n",
lpad(methstring, wm),
prettytimes(benchtimes(symbol(methstring * "_sumlt"), :ltdensefloatA)...),
prettytimes(benchtimes(symbol(methstring * "_sumall"), :ltdensefloatA)...),
prettytimes(benchtimes(symbol(methstring * "_sumlt"), :ltsparsefloatA)...),
prettytimes(benchtimes(symbol(methstring * "_sumall"), :ltsparsefloatA)...) )
end
println()
end
# perform UnitLowerTriangular benchmarks
for m in matsizes
global matsize = m
global ultdensefloatA = UnitLowerTriangular(rand(m,m));
global ultdenseintA = UnitLowerTriangular(rand(Int, m, m));
global ultsparsefloatA = UnitLowerTriangular(sprand(m, m, 0.05));
global ultsparseintA = UnitLowerTriangular(convert(SparseMatrixCSC{Int, Int}, round(Int, sprand(m, m, 0.05))));
println("UnitLowerTriangular $m x $m")
println(" $(lpad("[getindex]", wm)) $(lpad(" xorlt/dense", wt)), $(lpad("xorall/dense", wt)) | $(lpad(" xorlt/sparse", wt)), $(lpad("xorall/sparse", wt))")
for methstring in ("dref", "master", ultmethstrings...)
@printf("%s: %s, %s | %s, %s\n",
lpad(methstring, wm),
prettytimes(benchtimes(symbol(methstring * "_xorlt"), :ultdenseintA)...),
prettytimes(benchtimes(symbol(methstring * "_xorall"), :ultdenseintA)...),
prettytimes(benchtimes(symbol(methstring * "_xorlt"), :ultsparseintA)...),
prettytimes(benchtimes(symbol(methstring * "_xorall"), :ultsparseintA)...) )
end
println(" $(lpad("[getindex]", wm)) $(lpad(" sumlt/dense", wt)), $(lpad("sumall/dense", wt)) | $(lpad(" sumlt/sparse", wt)), $(lpad("sumall/sparse", wt))")
for methstring in ("dref", "master", ultmethstrings...)
@printf("%s: %s, %s | %s, %s\n",
lpad(methstring, wm),
prettytimes(benchtimes(symbol(methstring * "_sumlt"), :ultdensefloatA)...),
prettytimes(benchtimes(symbol(methstring * "_sumall"), :ultdensefloatA)...),
prettytimes(benchtimes(symbol(methstring * "_sumlt"), :ultsparsefloatA)...),
prettytimes(benchtimes(symbol(methstring * "_sumall"), :ultsparsefloatA)...) )
end
println()
end
julia> versioninfo()
Julia Version 0.5.0-dev+1913
Commit 1380bfe* (2015-12-27 15:31 UTC)
Platform Info:
System: Darwin (x86_64-apple-darwin14.5.0)
CPU: Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz
WORD_SIZE: 64
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Sandybridge)
LAPACK: libopenblas64_
LIBM: libopenlibm
LLVM: libLLVM-3.3
LowerTriangular 100 x 100
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 1.64 μs [ 1.64, 1.65], 2.57 μs [ 2.55, 2.59] | 52.12 μs [ 51.90, 52.34], 97.51 μs [ 94.72,100.30]
master: 6.01 μs [ 5.98, 6.04], 11.74 μs [ 11.69, 11.79] | 54.98 μs [ 53.53, 56.43], 119.79 μs [116.86,122.72]
tern: 6.40 μs [ 6.36, 6.44], 11.75 μs [ 11.68, 11.82] | 53.67 μs [ 53.13, 54.21], 125.39 μs [122.67,128.12]
ternz: 6.08 μs [ 6.05, 6.11], 11.77 μs [ 11.70, 11.83] | 51.07 μs [ 50.27, 51.87], 57.63 μs [ 57.01, 58.25]
ifelse: 2.64 μs [ 2.63, 2.65], 4.61 μs [ 4.60, 4.62] | 152.93 μs [151.13,154.74], 296.42 μs [295.09,297.75]
ifelsez: 2.63 μs [ 2.62, 2.64], 4.62 μs [ 4.61, 4.64] | 53.15 μs [ 52.25, 54.04], 98.70 μs [ 97.49, 99.90]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 4.55 μs [ 4.54, 4.56], 8.90 μs [ 8.86, 8.95] | 61.21 μs [ 60.88, 61.54], 115.64 μs [113.63,117.66]
master: 6.34 μs [ 6.30, 6.38], 11.20 μs [ 11.16, 11.23] | 64.57 μs [ 63.47, 65.67], 151.90 μs [150.87,152.93]
tern: 6.40 μs [ 6.34, 6.45], 11.18 μs [ 11.14, 11.22] | 63.75 μs [ 63.20, 64.30], 156.11 μs [149.98,162.25]
ternz: 6.42 μs [ 6.38, 6.46], 11.29 μs [ 11.18, 11.41] | 59.27 μs [ 58.79, 59.74], 71.95 μs [ 70.57, 73.32]
ifelse: 4.65 μs [ 4.61, 4.68], 9.09 μs [ 9.06, 9.11] | 167.88 μs [166.00,169.76], 357.80 μs [352.30,363.31]
ifelsez: 4.71 μs [ 4.68, 4.74], 9.09 μs [ 9.06, 9.12] | 62.54 μs [ 61.11, 63.96], 115.99 μs [114.39,117.59]
LowerTriangular 1000 x 1000
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 331.66 μs [287.58,375.73], 422.40 μs [417.02,427.78] | 7.09 ms [ 6.78, 7.40], 13.79 ms [ 13.20, 14.38]
master: 725.29 μs [642.61,807.98], 1.08 ms [ 1.00, 1.15] | 7.42 ms [ 7.03, 7.82], 19.65 ms [ 18.89, 20.41]
tern: 762.44 μs [639.18,885.71], 1.08 ms [ 1.01, 1.14] | 7.30 ms [ 6.93, 7.66], 20.25 ms [ 19.49, 21.01]
ternz: 742.17 μs [674.81,809.54], 1.07 ms [ 1.01, 1.14] | 7.04 ms [ 6.64, 7.45], 7.47 ms [ 7.04, 7.90]
ifelse: 405.06 μs [341.54,468.58], 548.94 μs [541.14,556.74] | 22.57 ms [ 21.77, 23.36], 43.50 ms [ 42.34, 44.66]
ifelsez: 348.62 μs [313.55,383.70], 607.23 μs [547.55,666.92] | 7.13 ms [ 6.77, 7.49], 13.52 ms [ 12.92, 14.12]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 583.27 μs [523.52,643.02], 925.64 μs [860.55,990.72] | 8.21 ms [ 7.81, 8.61], 16.94 ms [ 16.18, 17.70]
master: 741.52 μs [672.85,810.19], 1.10 ms [ 1.09, 1.12] | 8.77 ms [ 8.35, 9.19], 23.59 ms [ 22.78, 24.40]
tern: 702.48 μs [660.73,744.24], 1.13 ms [ 1.06, 1.20] | 8.92 ms [ 8.44, 9.40], 23.17 ms [ 22.50, 23.85]
ternz: 745.36 μs [682.25,808.47], 1.14 ms [ 1.07, 1.22] | 8.10 ms [ 7.74, 8.47], 9.35 ms [ 8.96, 9.75]
ifelse: 586.14 μs [525.69,646.58], 945.57 μs [932.72,958.42] | 24.83 ms [ 24.03, 25.63], 50.54 ms [ 49.46, 51.61]
ifelsez: 590.31 μs [533.39,647.23], 998.16 μs [918.93,1077.38] | 8.50 ms [ 8.01, 8.98], 16.80 ms [ 16.13, 17.47]
LowerTriangular 3000 x 3000
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 2.49 ms [ 2.31, 2.67], 4.23 ms [ 4.11, 4.34] | 75.55 ms [ 74.28, 76.81], 149.19 ms [147.16,151.21]
master: 5.77 ms [ 5.42, 6.12], 8.84 ms [ 8.46, 9.22] | 80.01 ms [ 78.64, 81.38], 228.07 ms [224.92,231.22]
tern: 5.82 ms [ 5.51, 6.13], 8.89 ms [ 8.46, 9.33] | 78.73 ms [ 77.48, 79.98], 233.87 ms [230.96,236.77]
ternz: 5.94 ms [ 5.62, 6.26], 8.91 ms [ 8.49, 9.33] | 74.53 ms [ 73.27, 75.79], 78.59 ms [ 77.27, 79.91]
ifelse: 2.88 ms [ 2.66, 3.09], 5.00 ms [ 4.85, 5.15] | 255.29 ms [251.54,259.05], 465.95 ms [459.12,472.78]
ifelsez: 2.83 ms [ 2.60, 3.06], 5.03 ms [ 4.84, 5.21] | 76.32 ms [ 75.12, 77.52], 146.15 ms [144.03,148.28]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 4.35 ms [ 4.16, 4.55], 8.17 ms [ 7.75, 8.58] | 89.16 ms [ 87.93, 90.39], 184.34 ms [181.91,186.76]
master: 5.80 ms [ 5.47, 6.12], 9.76 ms [ 9.24, 10.28] | 89.28 ms [ 87.80, 90.76], 270.35 ms [266.40,274.29]
tern: 5.77 ms [ 5.44, 6.09], 9.59 ms [ 9.17, 10.01] | 88.26 ms [ 86.96, 89.56], 270.47 ms [266.33,274.61]
ternz: 5.74 ms [ 5.45, 6.03], 9.60 ms [ 9.17, 10.02] | 89.36 ms [ 88.00, 90.72], 101.17 ms [ 99.60,102.73]
ifelse: 4.43 ms [ 4.20, 4.65], 8.32 ms [ 7.88, 8.76] | 278.30 ms [274.22,282.38], 582.56 ms [576.03,589.10]
ifelsez: 4.45 ms [ 4.17, 4.72], 8.28 ms [ 7.89, 8.68] | 91.21 ms [ 89.91, 92.51], 178.80 ms [176.28,181.31]
julia> versioninfo()
Julia Version 0.5.0-dev+1913
Commit 1380bfe (2015-12-27 15:31 UTC)
Platform Info:
System: Linux (x86_64-linux-gnu)
CPU: AMD Opteron(tm) Processor 6386 SE
WORD_SIZE: 64
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Piledriver)
LAPACK: libopenblas64_
LIBM: libopenlibm
LLVM: libLLVM-3.3
LowerTriangular 100 x 100
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 3.94 μs [ 3.89, 3.99], 4.05 μs [ 4.01, 4.09] | 74.07 μs [ 71.82, 76.31], 139.54 μs [135.81,143.26]
master: 10.40 μs [ 10.18, 10.63], 17.98 μs [ 17.72, 18.23] | 83.27 μs [ 80.21, 86.32], 184.84 μs [179.72,189.97]
tern: 10.46 μs [ 10.30, 10.63], 18.34 μs [ 18.03, 18.65] | 83.31 μs [ 79.56, 87.07], 184.79 μs [179.93,189.66]
ternz: 10.41 μs [ 10.26, 10.57], 17.93 μs [ 17.65, 18.21] | 74.23 μs [ 72.01, 76.45], 89.71 μs [ 87.18, 92.24]
ifelse: 4.47 μs [ 4.43, 4.52], 6.00 μs [ 5.93, 6.07] | 204.51 μs [199.19,209.84], 387.54 μs [378.94,396.14]
ifelsez: 4.47 μs [ 4.42, 4.52], 6.14 μs [ 6.03, 6.25] | 80.49 μs [ 78.89, 82.10], 148.93 μs [146.90,150.97]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 9.37 μs [ 9.26, 9.47], 16.49 μs [ 16.28, 16.69] | 101.25 μs [ 99.29,103.22], 182.52 μs [178.46,186.57]
master: 11.15 μs [ 11.03, 11.27], 18.66 μs [ 18.41, 18.91] | 93.75 μs [ 91.83, 95.67], 232.97 μs [228.45,237.49]
tern: 11.14 μs [ 11.00, 11.28], 19.16 μs [ 18.91, 19.41] | 93.60 μs [ 92.19, 95.01], 226.69 μs [221.83,231.55]
ternz: 11.09 μs [ 10.84, 11.34], 17.89 μs [ 17.67, 18.11] | 87.05 μs [ 84.62, 89.48], 112.86 μs [109.49,116.23]
ifelse: 8.68 μs [ 8.53, 8.82], 16.69 μs [ 16.42, 16.97] | 231.03 μs [224.88,237.17], 457.76 μs [448.08,467.43]
ifelsez: 8.88 μs [ 8.77, 9.00], 16.82 μs [ 16.60, 17.05] | 87.38 μs [ 84.60, 90.15], 185.11 μs [179.36,190.87]
LowerTriangular 1000 x 1000
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 421.46 μs [411.81,431.10], 906.83 μs [857.95,955.71] | 10.68 ms [ 10.60, 10.77], 21.24 ms [ 21.10, 21.37]
master: 952.78 μs [923.31,982.25], 1.66 ms [ 1.61, 1.71] | 11.44 ms [ 11.19, 11.68], 29.74 ms [ 29.57, 29.91]
tern: 949.78 μs [926.43,973.13], 1.66 ms [ 1.61, 1.71] | 11.12 ms [ 11.02, 11.23], 29.75 ms [ 29.58, 29.91]
ternz: 951.59 μs [928.57,974.62], 1.66 ms [ 1.61, 1.71] | 10.73 ms [ 10.66, 10.81], 12.05 ms [ 11.98, 12.13]
ifelse: 451.34 μs [442.80,459.87], 946.09 μs [912.72,979.45] | 29.53 ms [ 29.39, 29.66], 58.71 ms [ 58.52, 58.89]
ifelsez: 450.81 μs [442.68,458.94], 942.09 μs [916.64,967.53] | 10.90 ms [ 10.82, 10.99], 21.05 ms [ 20.92, 21.17]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 810.13 μs [765.15,855.12], 1.51 ms [ 1.49, 1.54] | 14.14 ms [ 14.05, 14.23], 28.29 ms [ 28.15, 28.42]
master: 923.39 μs [900.03,946.75], 1.60 ms [ 1.55, 1.64] | 12.97 ms [ 12.88, 13.06], 33.86 ms [ 33.73, 34.00]
tern: 921.97 μs [899.42,944.52], 1.60 ms [ 1.55, 1.65] | 12.96 ms [ 12.87, 13.05], 33.88 ms [ 33.72, 34.03]
ternz: 929.60 μs [893.47,965.73], 1.59 ms [ 1.56, 1.62] | 12.85 ms [ 12.78, 12.93], 15.20 ms [ 15.09, 15.30]
ifelse: 811.76 μs [794.19,829.33], 1.49 ms [ 1.48, 1.51] | 33.85 ms [ 33.52, 34.17], 67.74 ms [ 67.24, 68.24]
ifelsez: 812.03 μs [795.16,828.91], 1.51 ms [ 1.46, 1.56] | 12.95 ms [ 12.86, 13.04], 25.47 ms [ 25.34, 25.60]
LowerTriangular 3000 x 3000
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 5.80 ms [ 5.74, 5.86], 11.59 ms [ 11.50, 11.67] | 112.13 ms [111.80,112.47], 223.66 ms [223.45,223.88]
master: 9.21 ms [ 9.09, 9.33], 15.67 ms [ 15.56, 15.78] | 120.13 ms [119.22,121.05], 313.99 ms [313.03,314.95]
tern: 9.38 ms [ 9.25, 9.51], 15.68 ms [ 15.57, 15.78] | 117.90 ms [108.07,127.73], 313.81 ms [310.86,316.75]
ternz: 17.33 ms [ 17.22, 17.44], 24.80 ms [ 24.67, 24.93] | 112.58 ms [112.23,112.92], 122.47 ms [122.27,122.68]
ifelse: 12.08 ms [ 11.99, 12.17], 25.35 ms [ 25.20, 25.49] | 301.89 ms [301.48,302.29], 605.45 ms [604.91,605.98]
ifelsez: 12.09 ms [ 11.99, 12.19], 25.49 ms [ 25.35, 25.63] | 113.37 ms [112.94,113.79], 222.38 ms [221.70,223.06]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 8.07 ms [ 7.96, 8.19], 14.56 ms [ 14.46, 14.66] | 142.51 ms [141.72,143.30], 287.49 ms [286.76,288.22]
master: 9.15 ms [ 9.02, 9.29], 15.18 ms [ 15.08, 15.28] | 130.69 ms [130.41,130.98], 354.31 ms [353.39,355.23]
tern: 9.15 ms [ 9.00, 9.30], 15.20 ms [ 15.10, 15.30] | 130.73 ms [130.38,131.08], 354.49 ms [353.81,355.17]
ternz: 9.16 ms [ 9.03, 9.29], 15.18 ms [ 15.07, 15.29] | 129.57 ms [128.80,130.33], 150.96 ms [150.71,151.22]
ifelse: 8.15 ms [ 8.05, 8.26], 14.36 ms [ 14.26, 14.45] | 348.76 ms [344.55,352.97], 728.21 ms [717.77,738.64]
ifelsez: 8.16 ms [ 8.05, 8.27], 14.38 ms [ 14.30, 14.46] | 131.90 ms [131.68,132.12], 260.45 ms [259.35,261.55]
julia> versioninfo()
Julia Version 0.5.0-dev+1913
Commit 1380bfe* (2015-12-27 15:31 UTC)
Platform Info:
System: Darwin (x86_64-apple-darwin14.5.0)
CPU: Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz
WORD_SIZE: 64
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Sandybridge)
LAPACK: libopenblas64_
LIBM: libopenlibm
LLVM: libLLVM-3.3
UnitLowerTriangular 100 x 100
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 1.69 μs [ 1.69, 1.70], 2.53 μs [ 2.52, 2.53] | 51.86 μs [ 51.30, 52.43], 97.57 μs [ 96.47, 98.67]
master: 18.09 μs [ 17.92, 18.26], 31.95 μs [ 31.69, 32.21] | 63.23 μs [ 62.46, 64.00], 142.99 μs [141.20,144.78]
tern: 18.15 μs [ 17.99, 18.31], 32.57 μs [ 32.33, 32.81] | 63.63 μs [ 63.02, 64.24], 144.21 μs [141.15,147.27]
ternz: 18.10 μs [ 17.95, 18.25], 32.50 μs [ 32.27, 32.73] | 62.58 μs [ 62.18, 62.98], 79.34 μs [ 77.76, 80.91]
ifelse: 4.92 μs [ 4.90, 4.94], 8.16 μs [ 8.12, 8.21] | 138.53 μs [136.96,140.11], 281.28 μs [279.73,282.83]
ifelsez: 4.99 μs [ 4.97, 5.02], 7.98 μs [ 7.96, 8.00] | 53.91 μs [ 52.84, 54.99], 101.23 μs [ 99.88,102.58]
mixro: 7.46 μs [ 7.41, 7.52], 15.30 μs [ 15.25, 15.35] | 57.33 μs [ 56.96, 57.69], 124.89 μs [123.46,126.32]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 4.45 μs [ 4.45, 4.46], 8.87 μs [ 8.86, 8.89] | 60.07 μs [ 59.26, 60.89], 115.74 μs [113.82,117.67]
master: 23.15 μs [ 22.75, 23.56], 47.34 μs [ 46.60, 48.09] | 71.68 μs [ 70.80, 72.55], 166.80 μs [162.80,170.80]
tern: 22.28 μs [ 22.10, 22.47], 44.41 μs [ 44.17, 44.65] | 75.38 μs [ 74.76, 76.00], 175.93 μs [172.69,179.17]
ternz: 22.41 μs [ 22.17, 22.64], 46.33 μs [ 46.08, 46.57] | 77.24 μs [ 75.91, 78.57], 92.35 μs [ 90.97, 93.72]
ifelse: 6.12 μs [ 6.09, 6.14], 13.10 μs [ 13.00, 13.21] | 164.64 μs [158.82,170.45], 356.45 μs [355.60,357.29]
ifelsez: 6.63 μs [ 6.56, 6.71], 12.93 μs [ 12.88, 12.97] | 67.19 μs [ 66.36, 68.02], 125.59 μs [121.80,129.37]
mixro: 7.61 μs [ 7.53, 7.69], 14.66 μs [ 14.58, 14.74] | 65.35 μs [ 64.04, 66.66], 155.96 μs [151.79,160.13]
UnitLowerTriangular 1000 x 1000
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 342.30 μs [295.13,389.48], 447.94 μs [423.11,472.76] | 7.09 ms [ 6.66, 7.52], 14.07 ms [ 13.18, 14.96]
master: 1.83 ms [ 1.74, 1.92], 3.15 ms [ 2.97, 3.32] | 8.24 ms [ 7.88, 8.61], 21.56 ms [ 20.82, 22.29]
tern: 1.85 ms [ 1.72, 1.98], 3.17 ms [ 2.94, 3.40] | 8.36 ms [ 7.92, 8.80], 21.57 ms [ 20.69, 22.46]
ternz: 1.87 ms [ 1.74, 2.00], 3.17 ms [ 2.94, 3.39] | 8.13 ms [ 7.73, 8.53], 9.61 ms [ 9.19, 10.03]
ifelse: 580.54 μs [513.77,647.30], 804.79 μs [748.19,861.38] | 22.15 ms [ 21.40, 22.90], 44.22 ms [ 42.99, 45.45]
ifelsez: 573.22 μs [515.23,631.20], 796.49 μs [736.65,856.33] | 7.20 ms [ 6.82, 7.57], 14.34 ms [ 13.63, 15.04]
mixro: 795.58 μs [693.56,897.61], 1.40 ms [ 1.38, 1.42] | 7.53 ms [ 7.16, 7.90], 20.01 ms [ 19.33, 20.70]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 588.19 μs [511.05,665.32], 890.69 μs [878.98,902.40] | 8.15 ms [ 7.75, 8.54], 16.87 ms [ 16.19, 17.55]
master: 2.36 ms [ 2.17, 2.55], 4.58 ms [ 4.34, 4.82] | 9.53 ms [ 9.09, 9.97], 24.82 ms [ 23.84, 25.79]
tern: 2.27 ms [ 2.13, 2.41], 4.38 ms [ 4.17, 4.60] | 9.52 ms [ 9.06, 9.97], 26.01 ms [ 24.87, 27.16]
ternz: 2.22 ms [ 2.16, 2.28], 4.56 ms [ 4.35, 4.78] | 9.51 ms [ 9.02, 10.01], 11.48 ms [ 10.96, 12.01]
ifelse: 602.56 μs [520.56,684.56], 1.02 ms [ 0.96, 1.08] | 24.82 ms [ 24.02, 25.63], 52.60 ms [ 50.71, 54.48]
ifelsez: 644.65 μs [557.21,732.08], 1.13 ms [ 1.07, 1.19] | 9.36 ms [ 8.66, 10.06], 18.57 ms [ 17.63, 19.52]
mixro: 777.73 μs [670.57,884.88], 1.18 ms [ 1.11, 1.25] | 9.15 ms [ 8.54, 9.77], 24.19 ms [ 23.08, 25.29]
UnitLowerTriangular 3000 x 3000
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 2.48 ms [ 2.32, 2.64], 4.19 ms [ 4.09, 4.29] | 74.84 ms [ 73.32, 76.36], 149.64 ms [147.56,151.72]
master: 16.19 ms [ 15.51, 16.88], 29.21 ms [ 27.86, 30.55] | 85.25 ms [ 83.55, 86.95], 243.82 ms [237.42,250.22]
tern: 16.08 ms [ 15.43, 16.73], 27.73 ms [ 26.88, 28.58] | 84.89 ms [ 83.36, 86.41], 242.64 ms [239.42,245.87]
ternz: 16.21 ms [ 15.38, 17.04], 27.61 ms [ 26.76, 28.45] | 82.28 ms [ 80.83, 83.72], 95.40 ms [ 93.95, 96.85]
ifelse: 4.39 ms [ 4.10, 4.69], 6.86 ms [ 6.59, 7.12] | 232.94 ms [228.51,237.38], 474.72 ms [459.36,490.07]
ifelsez: 4.37 ms [ 4.10, 4.64], 6.99 ms [ 6.60, 7.37] | 76.00 ms [ 74.73, 77.27], 151.84 ms [149.34,154.34]
mixro: 6.37 ms [ 5.94, 6.80], 12.29 ms [ 11.78, 12.80] | 80.10 ms [ 78.86, 81.34], 231.29 ms [227.20,235.38]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 4.36 ms [ 4.16, 4.56], 8.21 ms [ 7.75, 8.67] | 89.73 ms [ 88.39, 91.07], 183.77 ms [181.36,186.18]
master: 19.92 ms [ 19.24, 20.61], 40.56 ms [ 39.53, 41.58] | 98.98 ms [ 97.29,100.67], 292.55 ms [287.64,297.45]
tern: 20.07 ms [ 19.28, 20.87], 39.53 ms [ 38.43, 40.62] | 104.51 ms [101.51,107.50], 297.25 ms [292.12,302.39]
ternz: 20.08 ms [ 19.28, 20.89], 41.11 ms [ 39.95, 42.26] | 98.55 ms [ 97.17, 99.92], 117.28 ms [115.69,118.86]
ifelse: 4.53 ms [ 4.24, 4.82], 8.50 ms [ 8.10, 8.90] | 283.87 ms [279.70,288.05], 632.28 ms [621.32,643.24]
ifelsez: 4.54 ms [ 4.23, 4.84], 8.46 ms [ 7.99, 8.92] | 90.17 ms [ 89.00, 91.33], 186.25 ms [184.04,188.46]
mixro: 5.76 ms [ 5.47, 6.05], 10.01 ms [ 9.48, 10.53] | 87.79 ms [ 86.32, 89.26], 279.62 ms [275.05,284.18]
julia> versioninfo()
Julia Version 0.5.0-dev+1913
Commit 1380bfe (2015-12-27 15:31 UTC)
Platform Info:
System: Linux (x86_64-linux-gnu)
CPU: AMD Opteron(tm) Processor 6386 SE
WORD_SIZE: 64
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Piledriver)
LAPACK: libopenblas64_
LIBM: libopenlibm
LLVM: libLLVM-3.3
UnitLowerTriangular 100 x 100
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 3.60 μs [ 3.54, 3.67], 4.05 μs [ 4.00, 4.10] | 71.01 μs [ 67.91, 74.11], 137.97 μs [133.35,142.60]
master: 23.26 μs [ 22.71, 23.81], 43.08 μs [ 42.21, 43.96] | 85.19 μs [ 82.16, 88.22], 196.00 μs [190.43,201.57]
tern: 23.25 μs [ 22.83, 23.68], 43.02 μs [ 41.89, 44.15] | 83.69 μs [ 80.35, 87.03], 193.68 μs [187.76,199.60]
ternz: 23.21 μs [ 22.86, 23.57], 43.24 μs [ 42.06, 44.43] | 82.41 μs [ 77.84, 86.99], 99.45 μs [ 96.20,102.69]
ifelse: 6.54 μs [ 6.47, 6.61], 8.53 μs [ 8.42, 8.63] | 193.41 μs [187.41,199.41], 371.73 μs [363.91,379.55]
ifelsez: 6.54 μs [ 6.43, 6.65], 8.53 μs [ 8.42, 8.64] | 73.54 μs [ 71.14, 75.95], 142.70 μs [138.57,146.82]
mixro: 12.08 μs [ 11.92, 12.24], 18.86 μs [ 18.55, 19.18] | 73.29 μs [ 70.50, 76.09], 175.63 μs [170.07,181.20]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 8.57 μs [ 8.46, 8.67], 15.89 μs [ 15.63, 16.15] | 97.84 μs [ 94.47,101.20], 183.18 μs [177.83,188.54]
master: 43.12 μs [ 42.10, 44.14], 85.28 μs [ 81.64, 88.91] | 115.12 μs [110.91,119.33], 255.75 μs [250.86,260.65]
tern: 42.86 μs [ 41.80, 43.92], 84.14 μs [ 80.89, 87.39] | 111.43 μs [107.99,114.87], 247.80 μs [242.32,253.27]
ternz: 42.96 μs [ 41.89, 44.03], 85.59 μs [ 82.73, 88.45] | 115.41 μs [111.21,119.62], 151.89 μs [149.12,154.65]
ifelse: 9.42 μs [ 9.30, 9.55], 26.73 μs [ 26.16, 27.31] | 232.06 μs [225.09,239.04], 459.63 μs [450.31,468.95]
ifelsez: 9.43 μs [ 9.31, 9.56], 26.00 μs [ 25.57, 26.42] | 94.65 μs [ 91.78, 97.51], 181.13 μs [175.66,186.60]
mixro: 12.80 μs [ 12.60, 13.00], 23.37 μs [ 22.93, 23.82] | 94.02 μs [ 90.85, 97.19], 221.74 μs [215.39,228.09]
UnitLowerTriangular 1000 x 1000
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 429.56 μs [389.49,469.63], 871.34 μs [846.41,896.27] | 10.69 ms [ 10.62, 10.77], 21.24 ms [ 21.12, 21.37]
master: 2.18 ms [ 2.13, 2.23], 4.09 ms [ 4.02, 4.16] | 12.15 ms [ 12.05, 12.25], 31.98 ms [ 31.84, 32.13]
tern: 2.18 ms [ 2.13, 2.23], 4.10 ms [ 4.04, 4.17] | 12.26 ms [ 12.14, 12.39], 31.91 ms [ 31.75, 32.07]
ternz: 2.18 ms [ 2.13, 2.22], 4.09 ms [ 4.04, 4.15] | 11.97 ms [ 11.90, 12.05], 13.70 ms [ 13.59, 13.82]
ifelse: 539.88 μs [529.15,550.60], 1.05 ms [ 0.99, 1.11] | 29.80 ms [ 29.67, 29.93], 59.46 ms [ 59.29, 59.63]
ifelsez: 549.59 μs [510.21,588.96], 1.05 ms [ 1.01, 1.08] | 11.14 ms [ 11.05, 11.23], 21.63 ms [ 21.52, 21.74]
mixro: 1.09 ms [ 1.06, 1.13], 1.72 ms [ 1.67, 1.77] | 11.29 ms [ 11.12, 11.45], 30.10 ms [ 29.94, 30.25]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 756.70 μs [744.10,769.29], 1.60 ms [ 1.55, 1.64] | 14.04 ms [ 13.95, 14.14], 28.25 ms [ 28.12, 28.39]
master: 4.11 ms [ 4.04, 4.17], 8.10 ms [ 8.02, 8.19] | 15.39 ms [ 15.30, 15.47], 38.08 ms [ 37.94, 38.22]
tern: 4.11 ms [ 4.04, 4.18], 8.09 ms [ 8.02, 8.16] | 15.44 ms [ 15.35, 15.53], 36.84 ms [ 36.70, 36.97]
ternz: 4.10 ms [ 4.05, 4.16], 8.09 ms [ 8.02, 8.16] | 15.57 ms [ 15.48, 15.66], 19.35 ms [ 19.24, 19.46]
ifelse: 852.74 μs [800.56,904.92], 2.40 ms [ 2.35, 2.45] | 32.92 ms [ 32.76, 33.08], 66.25 ms [ 65.99, 66.51]
ifelsez: 850.67 μs [805.76,895.59], 2.39 ms [ 2.34, 2.45] | 12.83 ms [ 12.75, 12.91], 25.50 ms [ 25.38, 25.63]
mixro: 1.15 ms [ 1.10, 1.20], 2.15 ms [ 2.10, 2.21] | 13.10 ms [ 13.00, 13.20], 33.73 ms [ 33.59, 33.86]
UnitLowerTriangular 3000 x 3000
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse
dref: 5.84 ms [ 5.78, 5.91], 11.67 ms [ 11.58, 11.76] | 112.00 ms [111.61,112.40], 223.71 ms [223.49,223.93]
master: 19.97 ms [ 19.84, 20.11], 37.56 ms [ 37.34, 37.78] | 125.20 ms [124.85,125.54], 335.48 ms [335.00,335.96]
tern: 20.67 ms [ 20.22, 21.13], 37.47 ms [ 37.33, 37.61] | 125.17 ms [124.81,125.54], 334.32 ms [333.92,334.72]
ternz: 21.46 ms [ 21.23, 21.70], 37.58 ms [ 37.35, 37.80] | 121.99 ms [121.48,122.51], 135.64 ms [135.38,135.91]
ifelse: 7.18 ms [ 7.12, 7.25], 12.88 ms [ 12.80, 12.96] | 307.29 ms [306.95,307.62], 616.46 ms [615.78,617.13]
ifelsez: 7.14 ms [ 7.07, 7.20], 12.89 ms [ 12.80, 12.98] | 116.27 ms [116.06,116.47], 227.27 ms [227.04,227.50]
mixro: 10.80 ms [ 10.56, 11.05], 16.39 ms [ 16.28, 16.50] | 117.39 ms [116.21,118.58], 319.18 ms [318.81,319.54]
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse
dref: 7.55 ms [ 7.48, 7.62], 15.59 ms [ 15.42, 15.76] | 142.66 ms [141.93,143.39], 287.24 ms [286.94,287.53]
master: 37.05 ms [ 36.90, 37.20], 73.07 ms [ 72.86, 73.28] | 153.19 ms [152.28,154.10], 398.18 ms [397.01,399.35]
tern: 37.07 ms [ 36.93, 37.21], 73.04 ms [ 72.85, 73.24] | 155.57 ms [154.76,156.37], 390.09 ms [389.64,390.54]
ternz: 37.04 ms [ 36.90, 37.18], 73.04 ms [ 72.86, 73.22] | 155.78 ms [154.90,156.67], 189.38 ms [189.09,189.66]
ifelse: 8.72 ms [ 8.38, 9.06], 22.10 ms [ 21.99, 22.21] | 345.51 ms [344.73,346.29], 713.40 ms [707.39,719.42]
ifelsez: 8.75 ms [ 8.39, 9.10], 22.09 ms [ 21.98, 22.21] | 129.21 ms [128.39,130.03], 259.75 ms [258.86,260.63]
mixro: 11.06 ms [ 10.86, 11.27], 20.26 ms [ 20.15, 20.37] | 131.16 ms [130.86,131.47], 360.45 ms [360.08,360.83]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment