Skip to content

Instantly share code, notes, and snippets.

@klowrey
Last active December 14, 2018 20:02
Show Gist options
  • Save klowrey/4118fee0fee87803fa5a46a756002924 to your computer and use it in GitHub Desktop.
Save klowrey/4118fee0fee87803fa5a46a756002924 to your computer and use it in GitHub Desktop.
faster faster!
using SIMD
using BenchmarkTools
foo(x, s, t) = x[ x .< s] .= t
foo2(x, s, t) = for i in eachindex(x) x[i] = x[i] < s ? t : x[i] end
foo3(x, s, t) = @inbounds for i in eachindex(x) x[i] = x[i] < s ? t : x[i] end
function foo3b(x, s, t)
@inbounds for i in eachindex(x)
(x[i] < s) && (x[i] = t)
end
end
function foo4(x::Array{T}, s::T, t::T, ::Type{Vec{N,T}}) where {N, T}
#@assert length(x) % N == 0
sv = Vec{N,T}(s)
tv = Vec{N,T}(t)
@inbounds for i in 1:N:length(x)
xv = vload(Vec{N,T}, x, i)
mask = xv < sv
xv = vifelse(mask, xv, tv)
vstore(xv, x, i)
end
end
function foo_t(x, s, t)
nt = Threads.nthreads()
N = length(x)
#@assert N % nt == 0
chunk = div(N, nt)
Threads.@threads for t=1:nt
chunk_range = ((t-1)*chunk + 1):(t*chunk)
@inbounds for i=chunk_range
x[i] = x[i] < s ? t : x[i]
end
end
end
T = Float64
x = rand(T, 200*200)
s = T(0.5)
t = T(0.11)
#@btime foo(x, s, t);
@btime foo2(x, s, t);
@btime foo3(x, s, t);
@btime foo3b(x, s, t);
@btime foo4(x, s, t, Vec{8,T});
@btime foo_t(x, s, t);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment