Skip to content

Instantly share code, notes, and snippets.

@timholy
Last active March 4, 2020 23:38
Show Gist options
  • Save timholy/7b0d9bc2fc95af01fba182e7f0095a8b to your computer and use it in GitHub Desktop.
Save timholy/7b0d9bc2fc95af01fba182e7f0095a8b to your computer and use it in GitHub Desktop.
ImageFiltering and LoopVectorization, perfect together
using ImageFiltering, ImageCore, OffsetArrays
using ImageFiltering: safehead, safetail, safe_for_prod
using LoopVectorization
function old!(out, A, kern, R=CartesianIndices(out), z=zero(eltype(out)))
Rk = CartesianIndices(axes(kern))
for I in safetail(R), i in safehead(R)
tmp = z
@inbounds for J in safetail(Rk), j in safehead(Rk)
tmp += safe_for_prod(A[i+j,I+J], tmp)*kern[j,J]
end
@inbounds out[i,I] = tmp
end
out
end
function old2d!(out::AbstractMatrix, A::AbstractMatrix, kern, R=CartesianIndices(out), z=zero(eltype(out)))
rng1k, rng2k = axes(kern)
rng1, rng2 = R.indices
for j in rng2, i in rng1
tmp = z
@inbounds for jk in rng2k, ik in rng1k
tmp += safe_for_prod(A[i+ik,j+jk], tmp)*kern[ik,jk]
end
@inbounds out[i,j] = tmp
end
out
end
function avx2d!(out::AbstractMatrix, A::AbstractMatrix, kern::OffsetArray, R=CartesianIndices(out), z=zero(eltype(out)))
rng1k, rng2k = axes(kern)
rng1, rng2 = R.indices
# Manually unpack the OffsetArray
kernA = parent(kern)
o1, o2 = kern.offsets
for j in rng2, i in rng1
tmp = z
@avx for jk in rng2k, ik in rng1k
tmp += A[i+ik,j+jk]*kernA[ik-o1,jk-o2]
end
out[i,j] = tmp
end
out
end
function avx2d_unpack!(out::AbstractMatrix, A::AbstractMatrix, kern::OffsetArray, R=CartesianIndices(out), z=zero(eltype(out)))
rng1k, rng2k = axes(kern)
rng1kf, rng1kl = first(rng1k), last(rng1k)
rng2kf, rng2kl = first(rng2k), last(rng2k)
rng1, rng2 = R.indices
# Manually unpack the OffsetArray
kernA = parent(kern)
o1, o2 = kern.offsets
for j in rng2, i in rng1
tmp = z
@avx for jk in rng2kf:rng2kl, ik in rng1kf:rng1kl
tmp += A[i+ik,j+jk]*kernA[ik-o1,jk-o2]
end
out[i,j] = tmp
end
out
end
A = rand(Float32, 100, 100)
kern = centered(rand(Float32, 3, 3))
out = OffsetArray(similar(A, size(A).-2), 1, 1) # stay away from the edges of A
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment