Last active
December 14, 2018 20:02
-
-
Save klowrey/4118fee0fee87803fa5a46a756002924 to your computer and use it in GitHub Desktop.
faster faster!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using SIMD | |
using BenchmarkTools | |
foo(x, s, t) = x[ x .< s] .= t | |
foo2(x, s, t) = for i in eachindex(x) x[i] = x[i] < s ? t : x[i] end | |
foo3(x, s, t) = @inbounds for i in eachindex(x) x[i] = x[i] < s ? t : x[i] end | |
function foo3b(x, s, t) | |
@inbounds for i in eachindex(x) | |
(x[i] < s) && (x[i] = t) | |
end | |
end | |
function foo4(x::Array{T}, s::T, t::T, ::Type{Vec{N,T}}) where {N, T} | |
#@assert length(x) % N == 0 | |
sv = Vec{N,T}(s) | |
tv = Vec{N,T}(t) | |
@inbounds for i in 1:N:length(x) | |
xv = vload(Vec{N,T}, x, i) | |
mask = xv < sv | |
xv = vifelse(mask, xv, tv) | |
vstore(xv, x, i) | |
end | |
end | |
function foo_t(x, s, t) | |
nt = Threads.nthreads() | |
N = length(x) | |
#@assert N % nt == 0 | |
chunk = div(N, nt) | |
Threads.@threads for t=1:nt | |
chunk_range = ((t-1)*chunk + 1):(t*chunk) | |
@inbounds for i=chunk_range | |
x[i] = x[i] < s ? t : x[i] | |
end | |
end | |
end | |
T = Float64 | |
x = rand(T, 200*200) | |
s = T(0.5) | |
t = T(0.11) | |
#@btime foo(x, s, t); | |
@btime foo2(x, s, t); | |
@btime foo3(x, s, t); | |
@btime foo3b(x, s, t); | |
@btime foo4(x, s, t, Vec{8,T}); | |
@btime foo_t(x, s, t); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment