Skip to content

Instantly share code, notes, and snippets.

@lindahua
Created December 16, 2013 22:19
Show Gist options
  • Save lindahua/7995482 to your computer and use it in GitHub Desktop.
Save lindahua/7995482 to your computer and use it in GitHub Desktop.
A fast & accurate implementation of sum
# cascade sum
mid(i0::Integer, i1::Integer) = i0 + ((i1 - i0) >> 1)
function ssum{T}(a::AbstractArray{T}, ifirst::Int, ilast::Int)
n = ilast - ifirst + 1
if n < 4
s = zero(T)
i = ifirst
while i <= ilast
@inbounds s += a[i]
i += 1
end
return s
else
@inbounds s1 = a[ifirst]
@inbounds s2 = a[ifirst+1]
@inbounds s3 = a[ifirst+2]
@inbounds s4 = a[ifirst+3]
i = ifirst + 4
il = ilast - 3
while i <= il
# the purpose of using multiple accumulators here
# is to leverage instruction pairing to hide
# read-after-write latency. Benchmark shows that
# this can lead to considerable performance
# improvement (nearly 2x).
@inbounds s1 += a[i]
@inbounds s2 += a[i+1]
@inbounds s3 += a[i+2]
@inbounds s4 += a[i+3]
i += 4
end
while i <= ilast
@inbounds s1 += a[i]
i += 1
end
return s1 + s2 + s3 + s4
end
end
ssum(a::AbstractArray) = ssum(a, 1, length(a))
function csum{T}(a::AbstractArray{T}, ifirst::Int, ilast::Int, bsiz::Int)
if ifirst + bsiz >= ilast
ssum(a, ifirst, ilast)
else
imid = mid(ifirst, ilast)
csum(a, ifirst, imid, bsiz) + csum(a, imid+1, ilast, bsiz)
end
end
csum(a::AbstractArray) = csum(a, 1, length(a), 1024)
## profiling
x = rand(10^5)
r0 = sum(x)
r1 = ssum(x)
r2 = csum(x)
println("|r0 - r2| = $(abs(r0 - r1))\n")
@time for i=1:1000 sum(x) end
@time for i=1:1000 ssum(x) end
@time for i=1:1000 csum(x) end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment