Last active
December 27, 2015 22:29
-
-
Save Sacha0/7c0b882b92292ecd7ff4 to your computer and use it in GitHub Desktop.
Benchmark indexing through triangular types in Julia.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Base.LinAlg.UnitLowerTriangular | |
import Base.LinAlg.UnitUpperTriangular | |
# Define getindex candidates for LowerTriangular matrices (tern_ is the original) | |
tern_getindex{T,S}(A::LowerTriangular{T,S}, i::Integer, j::Integer) = i >= j ? A.data[i,j] : zero(A.data[j,i]) | |
ternz_getindex{T,S}(A::LowerTriangular{T,S}, i::Integer, j::Integer) = i >= j ? A.data[i,j] : zero(T) | |
ifelse_getindex{T,S}(A::LowerTriangular{T,S}, i::Integer, j::Integer) = ifelse(i >= j, A.data[i,j], zero(A.data[j,i])) | |
ifelsez_getindex{T,S}(A::LowerTriangular{T,S}, i::Integer, j::Integer) = ifelse(i >= j, A.data[i,j], zero(T)) | |
ltmethstrings = ("tern", "ternz", "ifelse", "ifelsez") | |
# Define getindex candidates for UnitLowerTriangular matrices (tern_ is the original) | |
tern_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = i == j ? one(T) : (i > j ? A.data[i,j] : zero(A.data[j,i])) | |
ternz_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = i == j ? one(T) : (i > j ? A.data[i,j] : zero(T)) | |
ifelse_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = ifelse(i == j, one(T), ifelse(i > j, A.data[i,j], zero(A.data[j,i]))) | |
ifelsez_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = ifelse(i == j, one(T), ifelse(i > j, A.data[i,j], zero(T))) | |
mixro_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = i > j ? A.data[i,j] : ifelse(i == j, one(T), zero(A.data[j,i])) | |
mixroz_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = i > j ? A.data[i,j] : ifelse(i == j, one(T), zero(T)) | |
ultmethstrings = ("tern", "ternz", "ifelse", "ifelsez", "mixro") | |
# Directly index the underlying datastructure for comparison. Incorrect when operating over entire matrix. | |
dref_getindex{T,S}(A::LowerTriangular{T,S}, i::Integer, j::Integer) = A.data[i,j] | |
dref_getindex{T,S}(A::UnitLowerTriangular{T,S}, i::Integer, j::Integer) = A.data[i,j] | |
# Define benchmark functions | |
# xorlt() scans column-wise over the lower triangular half of the matrix, performing an xor for each element | |
# xorall() scans column-wise over the entire matrix, performing an xor for each element | |
# sumlt() scans column-wise over the lower triangular half of the matrix, summing the elements as it goes | |
# sumall() scans column-wise over the entire matrix, summing the elements as it goes | |
# ... using master getindex | |
function master_xorlt(A::Union{LowerTriangular, UnitLowerTriangular}, n, s) | |
@inbounds for j in 1:n, i in j:n | |
s $= getindex(A, i, j) | |
end | |
s | |
end | |
function master_xorall(A::Union{LowerTriangular, UnitLowerTriangular}, n, s) | |
@inbounds for j in 1:n, i in 1:n | |
s $= getindex(A, i, j) | |
end | |
s | |
end | |
function master_sumlt(A::Union{LowerTriangular, UnitLowerTriangular}, n, s) | |
@inbounds for j in 1:n, i in j:n | |
s += getindex(A, i, j) | |
end | |
s | |
end | |
function master_sumall(A::Union{LowerTriangular, UnitLowerTriangular}, n, s) | |
@inbounds for j in 1:n, i in 1:n | |
s += getindex(A, i, j) | |
end | |
s | |
end | |
# ... using the various getindex methods defined above | |
for tritype in (LowerTriangular, UnitLowerTriangular) | |
for methstring in ("dref", (tritype == LowerTriangular ? ltmethstrings : ultmethstrings)...) | |
@eval function $(symbol(methstring * "_xorlt"))(A::$tritype, n, s) | |
@inbounds for j in 1:n, i in j:n | |
s $= $(symbol(methstring * "_getindex"))(A, i, j) | |
end | |
s | |
end | |
@eval function $(symbol(methstring * "_xorall"))(A::$tritype, n, s) | |
@inbounds for j in 1:n, i in 1:n | |
s $= $(symbol(methstring * "_getindex"))(A, i, j) | |
end | |
s | |
end | |
@eval function $(symbol(methstring * "_sumlt"))(A::$tritype, n, s) | |
@inbounds for j in 1:n, i in j:n | |
s += $(symbol(methstring * "_getindex"))(A, i, j) | |
end | |
s | |
end | |
@eval function $(symbol(methstring * "_sumall"))(A::$tritype, n, s) | |
@inbounds for j in 1:n, i in 1:n | |
s += $(symbol(methstring * "_getindex"))(A, i, j) | |
end | |
s | |
end | |
end | |
end | |
# Test getindex methods via the reductions | |
testn = 10 | |
testmat = rand(testn, testn); | |
lttestmat = LowerTriangular(testmat); | |
ulttestmat = UnitLowerTriangular(testmat); | |
goodsumlt_lt = master_sumlt(lttestmat, testn, zero(Float64)); | |
goodsumlt_ult = master_sumlt(ulttestmat, testn, zero(Float64)); | |
goodsumall_lt = master_sumall(lttestmat, testn, zero(Float64)); | |
goodsumall_ult = master_sumall(ulttestmat, testn, zero(Float64)); | |
for methstring in ltmethstrings | |
sumlt_lt = @eval ($(symbol(methstring * "_sumlt")))(lttestmat, testn, zero(Float64)) | |
sumall_lt = @eval ($(symbol(methstring * "_sumall")))(lttestmat, testn, zero(Float64)) | |
goodsumlt_lt == sumlt_lt || error("Failure on sumlt_lt for method $methstring!") | |
goodsumall_lt == sumall_lt || error("Failure on sumall_lt for method $methstring!") | |
end | |
for methstring in ultmethstrings | |
sumlt_ult = @eval ($(symbol(methstring * "_sumlt")))(ulttestmat, testn, zero(Float64)) | |
sumall_ult = @eval ($(symbol(methstring * "_sumall")))(ulttestmat, testn, zero(Float64)) | |
goodsumlt_ult == sumlt_ult || error("Failure on sumlt_ult for method $methstring!") | |
goodsumall_ult == sumall_ult || error("Failure on sumall_ult for method $methstring!") | |
end | |
# Now the benchmarks proper | |
using Benchmarks: @benchmark, SummaryStatistics | |
function benchtimes(scanfuncsym, matsym) | |
stats = @eval SummaryStatistics(@benchmark ($scanfuncsym)($matsym, matsize, zero(eltype($matsym)))) | |
(stats.elapsed_time_center, get(stats.elapsed_time_lower), get(stats.elapsed_time_upper)) | |
end | |
function prettytimes(timecenter, timelower, timeupper) | |
# based on Benchmarks.pretty_time_string | |
timecenter < 1_000.0 ? (scalefactor = 1.0; units = "ns") : | |
timecenter < 1_000_000.0 ? (scalefactor = 1_000.0; units = "μs") : | |
timecenter < 1_000_000_000.0 ? (scalefactor = 1_000_000.0; units = "ms") : | |
(scalefactor = 1_000_000_000.0; units = " s") | |
@sprintf("%6.2f %s [%6.2f,%6.2f]", timecenter/scalefactor, units, timelower/scalefactor, timeupper/scalefactor) | |
end | |
# text display widths used below | |
wm, wt = 10, 25 | |
# matrix sizes to test | |
matsizes = (100, 1000, 3000) | |
# perform LowerTriangular benchmarks | |
for m in matsizes | |
global matsize = m | |
global ltdensefloatA = LowerTriangular(rand(m,m)); | |
global ltdenseintA = LowerTriangular(rand(Int, m, m)); | |
global ltsparsefloatA = LowerTriangular(sprand(m, m, 0.05)); | |
global ltsparseintA = LowerTriangular(convert(SparseMatrixCSC{Int, Int}, round(Int, sprand(m, m, 0.05)))); | |
println("LowerTriangular $m x $m") | |
println(" $(lpad("[getindex]", wm)) $(lpad("xorlt/dense", wt)), $(lpad("xorall/dense", wt)) | $(lpad("xorlt/sparse", wt)), $(lpad("xorall/sparse", wt))") | |
for methstring in ("dref", "master", ltmethstrings...) | |
@printf("%s: %s, %s | %s, %s\n", | |
lpad(methstring, wm), | |
prettytimes(benchtimes(symbol(methstring * "_xorlt"), :ltdenseintA)...), | |
prettytimes(benchtimes(symbol(methstring * "_xorall"), :ltdenseintA)...), | |
prettytimes(benchtimes(symbol(methstring * "_xorlt"), :ltsparseintA)...), | |
prettytimes(benchtimes(symbol(methstring * "_xorall"), :ltsparseintA)...) ) | |
end | |
println(" $(lpad("[getindex]", wm)) $(lpad("sumlt/dense", wt)), $(lpad("sumall/dense", wt)) | $(lpad("sumlt/sparse", wt)), $(lpad("sumall/sparse", wt))") | |
for methstring in ("dref", "master", ltmethstrings...) | |
@printf("%s: %s, %s | %s, %s\n", | |
lpad(methstring, wm), | |
prettytimes(benchtimes(symbol(methstring * "_sumlt"), :ltdensefloatA)...), | |
prettytimes(benchtimes(symbol(methstring * "_sumall"), :ltdensefloatA)...), | |
prettytimes(benchtimes(symbol(methstring * "_sumlt"), :ltsparsefloatA)...), | |
prettytimes(benchtimes(symbol(methstring * "_sumall"), :ltsparsefloatA)...) ) | |
end | |
println() | |
end | |
# perform UnitLowerTriangular benchmarks | |
for m in matsizes | |
global matsize = m | |
global ultdensefloatA = UnitLowerTriangular(rand(m,m)); | |
global ultdenseintA = UnitLowerTriangular(rand(Int, m, m)); | |
global ultsparsefloatA = UnitLowerTriangular(sprand(m, m, 0.05)); | |
global ultsparseintA = UnitLowerTriangular(convert(SparseMatrixCSC{Int, Int}, round(Int, sprand(m, m, 0.05)))); | |
println("UnitLowerTriangular $m x $m") | |
println(" $(lpad("[getindex]", wm)) $(lpad(" xorlt/dense", wt)), $(lpad("xorall/dense", wt)) | $(lpad(" xorlt/sparse", wt)), $(lpad("xorall/sparse", wt))") | |
for methstring in ("dref", "master", ultmethstrings...) | |
@printf("%s: %s, %s | %s, %s\n", | |
lpad(methstring, wm), | |
prettytimes(benchtimes(symbol(methstring * "_xorlt"), :ultdenseintA)...), | |
prettytimes(benchtimes(symbol(methstring * "_xorall"), :ultdenseintA)...), | |
prettytimes(benchtimes(symbol(methstring * "_xorlt"), :ultsparseintA)...), | |
prettytimes(benchtimes(symbol(methstring * "_xorall"), :ultsparseintA)...) ) | |
end | |
println(" $(lpad("[getindex]", wm)) $(lpad(" sumlt/dense", wt)), $(lpad("sumall/dense", wt)) | $(lpad(" sumlt/sparse", wt)), $(lpad("sumall/sparse", wt))") | |
for methstring in ("dref", "master", ultmethstrings...) | |
@printf("%s: %s, %s | %s, %s\n", | |
lpad(methstring, wm), | |
prettytimes(benchtimes(symbol(methstring * "_sumlt"), :ultdensefloatA)...), | |
prettytimes(benchtimes(symbol(methstring * "_sumall"), :ultdensefloatA)...), | |
prettytimes(benchtimes(symbol(methstring * "_sumlt"), :ultsparsefloatA)...), | |
prettytimes(benchtimes(symbol(methstring * "_sumall"), :ultsparsefloatA)...) ) | |
end | |
println() | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
julia> versioninfo() | |
Julia Version 0.5.0-dev+1913 | |
Commit 1380bfe* (2015-12-27 15:31 UTC) | |
Platform Info: | |
System: Darwin (x86_64-apple-darwin14.5.0) | |
CPU: Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz | |
WORD_SIZE: 64 | |
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Sandybridge) | |
LAPACK: libopenblas64_ | |
LIBM: libopenlibm | |
LLVM: libLLVM-3.3 | |
LowerTriangular 100 x 100 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 1.64 μs [ 1.64, 1.65], 2.57 μs [ 2.55, 2.59] | 52.12 μs [ 51.90, 52.34], 97.51 μs [ 94.72,100.30] | |
master: 6.01 μs [ 5.98, 6.04], 11.74 μs [ 11.69, 11.79] | 54.98 μs [ 53.53, 56.43], 119.79 μs [116.86,122.72] | |
tern: 6.40 μs [ 6.36, 6.44], 11.75 μs [ 11.68, 11.82] | 53.67 μs [ 53.13, 54.21], 125.39 μs [122.67,128.12] | |
ternz: 6.08 μs [ 6.05, 6.11], 11.77 μs [ 11.70, 11.83] | 51.07 μs [ 50.27, 51.87], 57.63 μs [ 57.01, 58.25] | |
ifelse: 2.64 μs [ 2.63, 2.65], 4.61 μs [ 4.60, 4.62] | 152.93 μs [151.13,154.74], 296.42 μs [295.09,297.75] | |
ifelsez: 2.63 μs [ 2.62, 2.64], 4.62 μs [ 4.61, 4.64] | 53.15 μs [ 52.25, 54.04], 98.70 μs [ 97.49, 99.90] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 4.55 μs [ 4.54, 4.56], 8.90 μs [ 8.86, 8.95] | 61.21 μs [ 60.88, 61.54], 115.64 μs [113.63,117.66] | |
master: 6.34 μs [ 6.30, 6.38], 11.20 μs [ 11.16, 11.23] | 64.57 μs [ 63.47, 65.67], 151.90 μs [150.87,152.93] | |
tern: 6.40 μs [ 6.34, 6.45], 11.18 μs [ 11.14, 11.22] | 63.75 μs [ 63.20, 64.30], 156.11 μs [149.98,162.25] | |
ternz: 6.42 μs [ 6.38, 6.46], 11.29 μs [ 11.18, 11.41] | 59.27 μs [ 58.79, 59.74], 71.95 μs [ 70.57, 73.32] | |
ifelse: 4.65 μs [ 4.61, 4.68], 9.09 μs [ 9.06, 9.11] | 167.88 μs [166.00,169.76], 357.80 μs [352.30,363.31] | |
ifelsez: 4.71 μs [ 4.68, 4.74], 9.09 μs [ 9.06, 9.12] | 62.54 μs [ 61.11, 63.96], 115.99 μs [114.39,117.59] | |
LowerTriangular 1000 x 1000 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 331.66 μs [287.58,375.73], 422.40 μs [417.02,427.78] | 7.09 ms [ 6.78, 7.40], 13.79 ms [ 13.20, 14.38] | |
master: 725.29 μs [642.61,807.98], 1.08 ms [ 1.00, 1.15] | 7.42 ms [ 7.03, 7.82], 19.65 ms [ 18.89, 20.41] | |
tern: 762.44 μs [639.18,885.71], 1.08 ms [ 1.01, 1.14] | 7.30 ms [ 6.93, 7.66], 20.25 ms [ 19.49, 21.01] | |
ternz: 742.17 μs [674.81,809.54], 1.07 ms [ 1.01, 1.14] | 7.04 ms [ 6.64, 7.45], 7.47 ms [ 7.04, 7.90] | |
ifelse: 405.06 μs [341.54,468.58], 548.94 μs [541.14,556.74] | 22.57 ms [ 21.77, 23.36], 43.50 ms [ 42.34, 44.66] | |
ifelsez: 348.62 μs [313.55,383.70], 607.23 μs [547.55,666.92] | 7.13 ms [ 6.77, 7.49], 13.52 ms [ 12.92, 14.12] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 583.27 μs [523.52,643.02], 925.64 μs [860.55,990.72] | 8.21 ms [ 7.81, 8.61], 16.94 ms [ 16.18, 17.70] | |
master: 741.52 μs [672.85,810.19], 1.10 ms [ 1.09, 1.12] | 8.77 ms [ 8.35, 9.19], 23.59 ms [ 22.78, 24.40] | |
tern: 702.48 μs [660.73,744.24], 1.13 ms [ 1.06, 1.20] | 8.92 ms [ 8.44, 9.40], 23.17 ms [ 22.50, 23.85] | |
ternz: 745.36 μs [682.25,808.47], 1.14 ms [ 1.07, 1.22] | 8.10 ms [ 7.74, 8.47], 9.35 ms [ 8.96, 9.75] | |
ifelse: 586.14 μs [525.69,646.58], 945.57 μs [932.72,958.42] | 24.83 ms [ 24.03, 25.63], 50.54 ms [ 49.46, 51.61] | |
ifelsez: 590.31 μs [533.39,647.23], 998.16 μs [918.93,1077.38] | 8.50 ms [ 8.01, 8.98], 16.80 ms [ 16.13, 17.47] | |
LowerTriangular 3000 x 3000 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 2.49 ms [ 2.31, 2.67], 4.23 ms [ 4.11, 4.34] | 75.55 ms [ 74.28, 76.81], 149.19 ms [147.16,151.21] | |
master: 5.77 ms [ 5.42, 6.12], 8.84 ms [ 8.46, 9.22] | 80.01 ms [ 78.64, 81.38], 228.07 ms [224.92,231.22] | |
tern: 5.82 ms [ 5.51, 6.13], 8.89 ms [ 8.46, 9.33] | 78.73 ms [ 77.48, 79.98], 233.87 ms [230.96,236.77] | |
ternz: 5.94 ms [ 5.62, 6.26], 8.91 ms [ 8.49, 9.33] | 74.53 ms [ 73.27, 75.79], 78.59 ms [ 77.27, 79.91] | |
ifelse: 2.88 ms [ 2.66, 3.09], 5.00 ms [ 4.85, 5.15] | 255.29 ms [251.54,259.05], 465.95 ms [459.12,472.78] | |
ifelsez: 2.83 ms [ 2.60, 3.06], 5.03 ms [ 4.84, 5.21] | 76.32 ms [ 75.12, 77.52], 146.15 ms [144.03,148.28] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 4.35 ms [ 4.16, 4.55], 8.17 ms [ 7.75, 8.58] | 89.16 ms [ 87.93, 90.39], 184.34 ms [181.91,186.76] | |
master: 5.80 ms [ 5.47, 6.12], 9.76 ms [ 9.24, 10.28] | 89.28 ms [ 87.80, 90.76], 270.35 ms [266.40,274.29] | |
tern: 5.77 ms [ 5.44, 6.09], 9.59 ms [ 9.17, 10.01] | 88.26 ms [ 86.96, 89.56], 270.47 ms [266.33,274.61] | |
ternz: 5.74 ms [ 5.45, 6.03], 9.60 ms [ 9.17, 10.02] | 89.36 ms [ 88.00, 90.72], 101.17 ms [ 99.60,102.73] | |
ifelse: 4.43 ms [ 4.20, 4.65], 8.32 ms [ 7.88, 8.76] | 278.30 ms [274.22,282.38], 582.56 ms [576.03,589.10] | |
ifelsez: 4.45 ms [ 4.17, 4.72], 8.28 ms [ 7.89, 8.68] | 91.21 ms [ 89.91, 92.51], 178.80 ms [176.28,181.31] | |
julia> versioninfo() | |
Julia Version 0.5.0-dev+1913 | |
Commit 1380bfe (2015-12-27 15:31 UTC) | |
Platform Info: | |
System: Linux (x86_64-linux-gnu) | |
CPU: AMD Opteron(tm) Processor 6386 SE | |
WORD_SIZE: 64 | |
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Piledriver) | |
LAPACK: libopenblas64_ | |
LIBM: libopenlibm | |
LLVM: libLLVM-3.3 | |
LowerTriangular 100 x 100 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 3.94 μs [ 3.89, 3.99], 4.05 μs [ 4.01, 4.09] | 74.07 μs [ 71.82, 76.31], 139.54 μs [135.81,143.26] | |
master: 10.40 μs [ 10.18, 10.63], 17.98 μs [ 17.72, 18.23] | 83.27 μs [ 80.21, 86.32], 184.84 μs [179.72,189.97] | |
tern: 10.46 μs [ 10.30, 10.63], 18.34 μs [ 18.03, 18.65] | 83.31 μs [ 79.56, 87.07], 184.79 μs [179.93,189.66] | |
ternz: 10.41 μs [ 10.26, 10.57], 17.93 μs [ 17.65, 18.21] | 74.23 μs [ 72.01, 76.45], 89.71 μs [ 87.18, 92.24] | |
ifelse: 4.47 μs [ 4.43, 4.52], 6.00 μs [ 5.93, 6.07] | 204.51 μs [199.19,209.84], 387.54 μs [378.94,396.14] | |
ifelsez: 4.47 μs [ 4.42, 4.52], 6.14 μs [ 6.03, 6.25] | 80.49 μs [ 78.89, 82.10], 148.93 μs [146.90,150.97] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 9.37 μs [ 9.26, 9.47], 16.49 μs [ 16.28, 16.69] | 101.25 μs [ 99.29,103.22], 182.52 μs [178.46,186.57] | |
master: 11.15 μs [ 11.03, 11.27], 18.66 μs [ 18.41, 18.91] | 93.75 μs [ 91.83, 95.67], 232.97 μs [228.45,237.49] | |
tern: 11.14 μs [ 11.00, 11.28], 19.16 μs [ 18.91, 19.41] | 93.60 μs [ 92.19, 95.01], 226.69 μs [221.83,231.55] | |
ternz: 11.09 μs [ 10.84, 11.34], 17.89 μs [ 17.67, 18.11] | 87.05 μs [ 84.62, 89.48], 112.86 μs [109.49,116.23] | |
ifelse: 8.68 μs [ 8.53, 8.82], 16.69 μs [ 16.42, 16.97] | 231.03 μs [224.88,237.17], 457.76 μs [448.08,467.43] | |
ifelsez: 8.88 μs [ 8.77, 9.00], 16.82 μs [ 16.60, 17.05] | 87.38 μs [ 84.60, 90.15], 185.11 μs [179.36,190.87] | |
LowerTriangular 1000 x 1000 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 421.46 μs [411.81,431.10], 906.83 μs [857.95,955.71] | 10.68 ms [ 10.60, 10.77], 21.24 ms [ 21.10, 21.37] | |
master: 952.78 μs [923.31,982.25], 1.66 ms [ 1.61, 1.71] | 11.44 ms [ 11.19, 11.68], 29.74 ms [ 29.57, 29.91] | |
tern: 949.78 μs [926.43,973.13], 1.66 ms [ 1.61, 1.71] | 11.12 ms [ 11.02, 11.23], 29.75 ms [ 29.58, 29.91] | |
ternz: 951.59 μs [928.57,974.62], 1.66 ms [ 1.61, 1.71] | 10.73 ms [ 10.66, 10.81], 12.05 ms [ 11.98, 12.13] | |
ifelse: 451.34 μs [442.80,459.87], 946.09 μs [912.72,979.45] | 29.53 ms [ 29.39, 29.66], 58.71 ms [ 58.52, 58.89] | |
ifelsez: 450.81 μs [442.68,458.94], 942.09 μs [916.64,967.53] | 10.90 ms [ 10.82, 10.99], 21.05 ms [ 20.92, 21.17] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 810.13 μs [765.15,855.12], 1.51 ms [ 1.49, 1.54] | 14.14 ms [ 14.05, 14.23], 28.29 ms [ 28.15, 28.42] | |
master: 923.39 μs [900.03,946.75], 1.60 ms [ 1.55, 1.64] | 12.97 ms [ 12.88, 13.06], 33.86 ms [ 33.73, 34.00] | |
tern: 921.97 μs [899.42,944.52], 1.60 ms [ 1.55, 1.65] | 12.96 ms [ 12.87, 13.05], 33.88 ms [ 33.72, 34.03] | |
ternz: 929.60 μs [893.47,965.73], 1.59 ms [ 1.56, 1.62] | 12.85 ms [ 12.78, 12.93], 15.20 ms [ 15.09, 15.30] | |
ifelse: 811.76 μs [794.19,829.33], 1.49 ms [ 1.48, 1.51] | 33.85 ms [ 33.52, 34.17], 67.74 ms [ 67.24, 68.24] | |
ifelsez: 812.03 μs [795.16,828.91], 1.51 ms [ 1.46, 1.56] | 12.95 ms [ 12.86, 13.04], 25.47 ms [ 25.34, 25.60] | |
LowerTriangular 3000 x 3000 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 5.80 ms [ 5.74, 5.86], 11.59 ms [ 11.50, 11.67] | 112.13 ms [111.80,112.47], 223.66 ms [223.45,223.88] | |
master: 9.21 ms [ 9.09, 9.33], 15.67 ms [ 15.56, 15.78] | 120.13 ms [119.22,121.05], 313.99 ms [313.03,314.95] | |
tern: 9.38 ms [ 9.25, 9.51], 15.68 ms [ 15.57, 15.78] | 117.90 ms [108.07,127.73], 313.81 ms [310.86,316.75] | |
ternz: 17.33 ms [ 17.22, 17.44], 24.80 ms [ 24.67, 24.93] | 112.58 ms [112.23,112.92], 122.47 ms [122.27,122.68] | |
ifelse: 12.08 ms [ 11.99, 12.17], 25.35 ms [ 25.20, 25.49] | 301.89 ms [301.48,302.29], 605.45 ms [604.91,605.98] | |
ifelsez: 12.09 ms [ 11.99, 12.19], 25.49 ms [ 25.35, 25.63] | 113.37 ms [112.94,113.79], 222.38 ms [221.70,223.06] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 8.07 ms [ 7.96, 8.19], 14.56 ms [ 14.46, 14.66] | 142.51 ms [141.72,143.30], 287.49 ms [286.76,288.22] | |
master: 9.15 ms [ 9.02, 9.29], 15.18 ms [ 15.08, 15.28] | 130.69 ms [130.41,130.98], 354.31 ms [353.39,355.23] | |
tern: 9.15 ms [ 9.00, 9.30], 15.20 ms [ 15.10, 15.30] | 130.73 ms [130.38,131.08], 354.49 ms [353.81,355.17] | |
ternz: 9.16 ms [ 9.03, 9.29], 15.18 ms [ 15.07, 15.29] | 129.57 ms [128.80,130.33], 150.96 ms [150.71,151.22] | |
ifelse: 8.15 ms [ 8.05, 8.26], 14.36 ms [ 14.26, 14.45] | 348.76 ms [344.55,352.97], 728.21 ms [717.77,738.64] | |
ifelsez: 8.16 ms [ 8.05, 8.27], 14.38 ms [ 14.30, 14.46] | 131.90 ms [131.68,132.12], 260.45 ms [259.35,261.55] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
julia> versioninfo() | |
Julia Version 0.5.0-dev+1913 | |
Commit 1380bfe* (2015-12-27 15:31 UTC) | |
Platform Info: | |
System: Darwin (x86_64-apple-darwin14.5.0) | |
CPU: Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz | |
WORD_SIZE: 64 | |
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Sandybridge) | |
LAPACK: libopenblas64_ | |
LIBM: libopenlibm | |
LLVM: libLLVM-3.3 | |
UnitLowerTriangular 100 x 100 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 1.69 μs [ 1.69, 1.70], 2.53 μs [ 2.52, 2.53] | 51.86 μs [ 51.30, 52.43], 97.57 μs [ 96.47, 98.67] | |
master: 18.09 μs [ 17.92, 18.26], 31.95 μs [ 31.69, 32.21] | 63.23 μs [ 62.46, 64.00], 142.99 μs [141.20,144.78] | |
tern: 18.15 μs [ 17.99, 18.31], 32.57 μs [ 32.33, 32.81] | 63.63 μs [ 63.02, 64.24], 144.21 μs [141.15,147.27] | |
ternz: 18.10 μs [ 17.95, 18.25], 32.50 μs [ 32.27, 32.73] | 62.58 μs [ 62.18, 62.98], 79.34 μs [ 77.76, 80.91] | |
ifelse: 4.92 μs [ 4.90, 4.94], 8.16 μs [ 8.12, 8.21] | 138.53 μs [136.96,140.11], 281.28 μs [279.73,282.83] | |
ifelsez: 4.99 μs [ 4.97, 5.02], 7.98 μs [ 7.96, 8.00] | 53.91 μs [ 52.84, 54.99], 101.23 μs [ 99.88,102.58] | |
mixro: 7.46 μs [ 7.41, 7.52], 15.30 μs [ 15.25, 15.35] | 57.33 μs [ 56.96, 57.69], 124.89 μs [123.46,126.32] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 4.45 μs [ 4.45, 4.46], 8.87 μs [ 8.86, 8.89] | 60.07 μs [ 59.26, 60.89], 115.74 μs [113.82,117.67] | |
master: 23.15 μs [ 22.75, 23.56], 47.34 μs [ 46.60, 48.09] | 71.68 μs [ 70.80, 72.55], 166.80 μs [162.80,170.80] | |
tern: 22.28 μs [ 22.10, 22.47], 44.41 μs [ 44.17, 44.65] | 75.38 μs [ 74.76, 76.00], 175.93 μs [172.69,179.17] | |
ternz: 22.41 μs [ 22.17, 22.64], 46.33 μs [ 46.08, 46.57] | 77.24 μs [ 75.91, 78.57], 92.35 μs [ 90.97, 93.72] | |
ifelse: 6.12 μs [ 6.09, 6.14], 13.10 μs [ 13.00, 13.21] | 164.64 μs [158.82,170.45], 356.45 μs [355.60,357.29] | |
ifelsez: 6.63 μs [ 6.56, 6.71], 12.93 μs [ 12.88, 12.97] | 67.19 μs [ 66.36, 68.02], 125.59 μs [121.80,129.37] | |
mixro: 7.61 μs [ 7.53, 7.69], 14.66 μs [ 14.58, 14.74] | 65.35 μs [ 64.04, 66.66], 155.96 μs [151.79,160.13] | |
UnitLowerTriangular 1000 x 1000 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 342.30 μs [295.13,389.48], 447.94 μs [423.11,472.76] | 7.09 ms [ 6.66, 7.52], 14.07 ms [ 13.18, 14.96] | |
master: 1.83 ms [ 1.74, 1.92], 3.15 ms [ 2.97, 3.32] | 8.24 ms [ 7.88, 8.61], 21.56 ms [ 20.82, 22.29] | |
tern: 1.85 ms [ 1.72, 1.98], 3.17 ms [ 2.94, 3.40] | 8.36 ms [ 7.92, 8.80], 21.57 ms [ 20.69, 22.46] | |
ternz: 1.87 ms [ 1.74, 2.00], 3.17 ms [ 2.94, 3.39] | 8.13 ms [ 7.73, 8.53], 9.61 ms [ 9.19, 10.03] | |
ifelse: 580.54 μs [513.77,647.30], 804.79 μs [748.19,861.38] | 22.15 ms [ 21.40, 22.90], 44.22 ms [ 42.99, 45.45] | |
ifelsez: 573.22 μs [515.23,631.20], 796.49 μs [736.65,856.33] | 7.20 ms [ 6.82, 7.57], 14.34 ms [ 13.63, 15.04] | |
mixro: 795.58 μs [693.56,897.61], 1.40 ms [ 1.38, 1.42] | 7.53 ms [ 7.16, 7.90], 20.01 ms [ 19.33, 20.70] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 588.19 μs [511.05,665.32], 890.69 μs [878.98,902.40] | 8.15 ms [ 7.75, 8.54], 16.87 ms [ 16.19, 17.55] | |
master: 2.36 ms [ 2.17, 2.55], 4.58 ms [ 4.34, 4.82] | 9.53 ms [ 9.09, 9.97], 24.82 ms [ 23.84, 25.79] | |
tern: 2.27 ms [ 2.13, 2.41], 4.38 ms [ 4.17, 4.60] | 9.52 ms [ 9.06, 9.97], 26.01 ms [ 24.87, 27.16] | |
ternz: 2.22 ms [ 2.16, 2.28], 4.56 ms [ 4.35, 4.78] | 9.51 ms [ 9.02, 10.01], 11.48 ms [ 10.96, 12.01] | |
ifelse: 602.56 μs [520.56,684.56], 1.02 ms [ 0.96, 1.08] | 24.82 ms [ 24.02, 25.63], 52.60 ms [ 50.71, 54.48] | |
ifelsez: 644.65 μs [557.21,732.08], 1.13 ms [ 1.07, 1.19] | 9.36 ms [ 8.66, 10.06], 18.57 ms [ 17.63, 19.52] | |
mixro: 777.73 μs [670.57,884.88], 1.18 ms [ 1.11, 1.25] | 9.15 ms [ 8.54, 9.77], 24.19 ms [ 23.08, 25.29] | |
UnitLowerTriangular 3000 x 3000 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 2.48 ms [ 2.32, 2.64], 4.19 ms [ 4.09, 4.29] | 74.84 ms [ 73.32, 76.36], 149.64 ms [147.56,151.72] | |
master: 16.19 ms [ 15.51, 16.88], 29.21 ms [ 27.86, 30.55] | 85.25 ms [ 83.55, 86.95], 243.82 ms [237.42,250.22] | |
tern: 16.08 ms [ 15.43, 16.73], 27.73 ms [ 26.88, 28.58] | 84.89 ms [ 83.36, 86.41], 242.64 ms [239.42,245.87] | |
ternz: 16.21 ms [ 15.38, 17.04], 27.61 ms [ 26.76, 28.45] | 82.28 ms [ 80.83, 83.72], 95.40 ms [ 93.95, 96.85] | |
ifelse: 4.39 ms [ 4.10, 4.69], 6.86 ms [ 6.59, 7.12] | 232.94 ms [228.51,237.38], 474.72 ms [459.36,490.07] | |
ifelsez: 4.37 ms [ 4.10, 4.64], 6.99 ms [ 6.60, 7.37] | 76.00 ms [ 74.73, 77.27], 151.84 ms [149.34,154.34] | |
mixro: 6.37 ms [ 5.94, 6.80], 12.29 ms [ 11.78, 12.80] | 80.10 ms [ 78.86, 81.34], 231.29 ms [227.20,235.38] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 4.36 ms [ 4.16, 4.56], 8.21 ms [ 7.75, 8.67] | 89.73 ms [ 88.39, 91.07], 183.77 ms [181.36,186.18] | |
master: 19.92 ms [ 19.24, 20.61], 40.56 ms [ 39.53, 41.58] | 98.98 ms [ 97.29,100.67], 292.55 ms [287.64,297.45] | |
tern: 20.07 ms [ 19.28, 20.87], 39.53 ms [ 38.43, 40.62] | 104.51 ms [101.51,107.50], 297.25 ms [292.12,302.39] | |
ternz: 20.08 ms [ 19.28, 20.89], 41.11 ms [ 39.95, 42.26] | 98.55 ms [ 97.17, 99.92], 117.28 ms [115.69,118.86] | |
ifelse: 4.53 ms [ 4.24, 4.82], 8.50 ms [ 8.10, 8.90] | 283.87 ms [279.70,288.05], 632.28 ms [621.32,643.24] | |
ifelsez: 4.54 ms [ 4.23, 4.84], 8.46 ms [ 7.99, 8.92] | 90.17 ms [ 89.00, 91.33], 186.25 ms [184.04,188.46] | |
mixro: 5.76 ms [ 5.47, 6.05], 10.01 ms [ 9.48, 10.53] | 87.79 ms [ 86.32, 89.26], 279.62 ms [275.05,284.18] | |
julia> versioninfo() | |
Julia Version 0.5.0-dev+1913 | |
Commit 1380bfe (2015-12-27 15:31 UTC) | |
Platform Info: | |
System: Linux (x86_64-linux-gnu) | |
CPU: AMD Opteron(tm) Processor 6386 SE | |
WORD_SIZE: 64 | |
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Piledriver) | |
LAPACK: libopenblas64_ | |
LIBM: libopenlibm | |
LLVM: libLLVM-3.3 | |
UnitLowerTriangular 100 x 100 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 3.60 μs [ 3.54, 3.67], 4.05 μs [ 4.00, 4.10] | 71.01 μs [ 67.91, 74.11], 137.97 μs [133.35,142.60] | |
master: 23.26 μs [ 22.71, 23.81], 43.08 μs [ 42.21, 43.96] | 85.19 μs [ 82.16, 88.22], 196.00 μs [190.43,201.57] | |
tern: 23.25 μs [ 22.83, 23.68], 43.02 μs [ 41.89, 44.15] | 83.69 μs [ 80.35, 87.03], 193.68 μs [187.76,199.60] | |
ternz: 23.21 μs [ 22.86, 23.57], 43.24 μs [ 42.06, 44.43] | 82.41 μs [ 77.84, 86.99], 99.45 μs [ 96.20,102.69] | |
ifelse: 6.54 μs [ 6.47, 6.61], 8.53 μs [ 8.42, 8.63] | 193.41 μs [187.41,199.41], 371.73 μs [363.91,379.55] | |
ifelsez: 6.54 μs [ 6.43, 6.65], 8.53 μs [ 8.42, 8.64] | 73.54 μs [ 71.14, 75.95], 142.70 μs [138.57,146.82] | |
mixro: 12.08 μs [ 11.92, 12.24], 18.86 μs [ 18.55, 19.18] | 73.29 μs [ 70.50, 76.09], 175.63 μs [170.07,181.20] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 8.57 μs [ 8.46, 8.67], 15.89 μs [ 15.63, 16.15] | 97.84 μs [ 94.47,101.20], 183.18 μs [177.83,188.54] | |
master: 43.12 μs [ 42.10, 44.14], 85.28 μs [ 81.64, 88.91] | 115.12 μs [110.91,119.33], 255.75 μs [250.86,260.65] | |
tern: 42.86 μs [ 41.80, 43.92], 84.14 μs [ 80.89, 87.39] | 111.43 μs [107.99,114.87], 247.80 μs [242.32,253.27] | |
ternz: 42.96 μs [ 41.89, 44.03], 85.59 μs [ 82.73, 88.45] | 115.41 μs [111.21,119.62], 151.89 μs [149.12,154.65] | |
ifelse: 9.42 μs [ 9.30, 9.55], 26.73 μs [ 26.16, 27.31] | 232.06 μs [225.09,239.04], 459.63 μs [450.31,468.95] | |
ifelsez: 9.43 μs [ 9.31, 9.56], 26.00 μs [ 25.57, 26.42] | 94.65 μs [ 91.78, 97.51], 181.13 μs [175.66,186.60] | |
mixro: 12.80 μs [ 12.60, 13.00], 23.37 μs [ 22.93, 23.82] | 94.02 μs [ 90.85, 97.19], 221.74 μs [215.39,228.09] | |
UnitLowerTriangular 1000 x 1000 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 429.56 μs [389.49,469.63], 871.34 μs [846.41,896.27] | 10.69 ms [ 10.62, 10.77], 21.24 ms [ 21.12, 21.37] | |
master: 2.18 ms [ 2.13, 2.23], 4.09 ms [ 4.02, 4.16] | 12.15 ms [ 12.05, 12.25], 31.98 ms [ 31.84, 32.13] | |
tern: 2.18 ms [ 2.13, 2.23], 4.10 ms [ 4.04, 4.17] | 12.26 ms [ 12.14, 12.39], 31.91 ms [ 31.75, 32.07] | |
ternz: 2.18 ms [ 2.13, 2.22], 4.09 ms [ 4.04, 4.15] | 11.97 ms [ 11.90, 12.05], 13.70 ms [ 13.59, 13.82] | |
ifelse: 539.88 μs [529.15,550.60], 1.05 ms [ 0.99, 1.11] | 29.80 ms [ 29.67, 29.93], 59.46 ms [ 59.29, 59.63] | |
ifelsez: 549.59 μs [510.21,588.96], 1.05 ms [ 1.01, 1.08] | 11.14 ms [ 11.05, 11.23], 21.63 ms [ 21.52, 21.74] | |
mixro: 1.09 ms [ 1.06, 1.13], 1.72 ms [ 1.67, 1.77] | 11.29 ms [ 11.12, 11.45], 30.10 ms [ 29.94, 30.25] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 756.70 μs [744.10,769.29], 1.60 ms [ 1.55, 1.64] | 14.04 ms [ 13.95, 14.14], 28.25 ms [ 28.12, 28.39] | |
master: 4.11 ms [ 4.04, 4.17], 8.10 ms [ 8.02, 8.19] | 15.39 ms [ 15.30, 15.47], 38.08 ms [ 37.94, 38.22] | |
tern: 4.11 ms [ 4.04, 4.18], 8.09 ms [ 8.02, 8.16] | 15.44 ms [ 15.35, 15.53], 36.84 ms [ 36.70, 36.97] | |
ternz: 4.10 ms [ 4.05, 4.16], 8.09 ms [ 8.02, 8.16] | 15.57 ms [ 15.48, 15.66], 19.35 ms [ 19.24, 19.46] | |
ifelse: 852.74 μs [800.56,904.92], 2.40 ms [ 2.35, 2.45] | 32.92 ms [ 32.76, 33.08], 66.25 ms [ 65.99, 66.51] | |
ifelsez: 850.67 μs [805.76,895.59], 2.39 ms [ 2.34, 2.45] | 12.83 ms [ 12.75, 12.91], 25.50 ms [ 25.38, 25.63] | |
mixro: 1.15 ms [ 1.10, 1.20], 2.15 ms [ 2.10, 2.21] | 13.10 ms [ 13.00, 13.20], 33.73 ms [ 33.59, 33.86] | |
UnitLowerTriangular 3000 x 3000 | |
[getindex] xorlt/dense, xorall/dense | xorlt/sparse, xorall/sparse | |
dref: 5.84 ms [ 5.78, 5.91], 11.67 ms [ 11.58, 11.76] | 112.00 ms [111.61,112.40], 223.71 ms [223.49,223.93] | |
master: 19.97 ms [ 19.84, 20.11], 37.56 ms [ 37.34, 37.78] | 125.20 ms [124.85,125.54], 335.48 ms [335.00,335.96] | |
tern: 20.67 ms [ 20.22, 21.13], 37.47 ms [ 37.33, 37.61] | 125.17 ms [124.81,125.54], 334.32 ms [333.92,334.72] | |
ternz: 21.46 ms [ 21.23, 21.70], 37.58 ms [ 37.35, 37.80] | 121.99 ms [121.48,122.51], 135.64 ms [135.38,135.91] | |
ifelse: 7.18 ms [ 7.12, 7.25], 12.88 ms [ 12.80, 12.96] | 307.29 ms [306.95,307.62], 616.46 ms [615.78,617.13] | |
ifelsez: 7.14 ms [ 7.07, 7.20], 12.89 ms [ 12.80, 12.98] | 116.27 ms [116.06,116.47], 227.27 ms [227.04,227.50] | |
mixro: 10.80 ms [ 10.56, 11.05], 16.39 ms [ 16.28, 16.50] | 117.39 ms [116.21,118.58], 319.18 ms [318.81,319.54] | |
[getindex] sumlt/dense, sumall/dense | sumlt/sparse, sumall/sparse | |
dref: 7.55 ms [ 7.48, 7.62], 15.59 ms [ 15.42, 15.76] | 142.66 ms [141.93,143.39], 287.24 ms [286.94,287.53] | |
master: 37.05 ms [ 36.90, 37.20], 73.07 ms [ 72.86, 73.28] | 153.19 ms [152.28,154.10], 398.18 ms [397.01,399.35] | |
tern: 37.07 ms [ 36.93, 37.21], 73.04 ms [ 72.85, 73.24] | 155.57 ms [154.76,156.37], 390.09 ms [389.64,390.54] | |
ternz: 37.04 ms [ 36.90, 37.18], 73.04 ms [ 72.86, 73.22] | 155.78 ms [154.90,156.67], 189.38 ms [189.09,189.66] | |
ifelse: 8.72 ms [ 8.38, 9.06], 22.10 ms [ 21.99, 22.21] | 345.51 ms [344.73,346.29], 713.40 ms [707.39,719.42] | |
ifelsez: 8.75 ms [ 8.39, 9.10], 22.09 ms [ 21.98, 22.21] | 129.21 ms [128.39,130.03], 259.75 ms [258.86,260.63] | |
mixro: 11.06 ms [ 10.86, 11.27], 20.26 ms [ 20.15, 20.37] | 131.16 ms [130.86,131.47], 360.45 ms [360.08,360.83] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment