Last active
March 4, 2020 23:38
-
-
Save timholy/7b0d9bc2fc95af01fba182e7f0095a8b to your computer and use it in GitHub Desktop.
ImageFiltering and LoopVectorization, perfect together
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using ImageFiltering, ImageCore, OffsetArrays | |
using ImageFiltering: safehead, safetail, safe_for_prod | |
using LoopVectorization | |
function old!(out, A, kern, R=CartesianIndices(out), z=zero(eltype(out))) | |
Rk = CartesianIndices(axes(kern)) | |
for I in safetail(R), i in safehead(R) | |
tmp = z | |
@inbounds for J in safetail(Rk), j in safehead(Rk) | |
tmp += safe_for_prod(A[i+j,I+J], tmp)*kern[j,J] | |
end | |
@inbounds out[i,I] = tmp | |
end | |
out | |
end | |
function old2d!(out::AbstractMatrix, A::AbstractMatrix, kern, R=CartesianIndices(out), z=zero(eltype(out))) | |
rng1k, rng2k = axes(kern) | |
rng1, rng2 = R.indices | |
for j in rng2, i in rng1 | |
tmp = z | |
@inbounds for jk in rng2k, ik in rng1k | |
tmp += safe_for_prod(A[i+ik,j+jk], tmp)*kern[ik,jk] | |
end | |
@inbounds out[i,j] = tmp | |
end | |
out | |
end | |
function avx2d!(out::AbstractMatrix, A::AbstractMatrix, kern::OffsetArray, R=CartesianIndices(out), z=zero(eltype(out))) | |
rng1k, rng2k = axes(kern) | |
rng1, rng2 = R.indices | |
# Manually unpack the OffsetArray | |
kernA = parent(kern) | |
o1, o2 = kern.offsets | |
for j in rng2, i in rng1 | |
tmp = z | |
@avx for jk in rng2k, ik in rng1k | |
tmp += A[i+ik,j+jk]*kernA[ik-o1,jk-o2] | |
end | |
out[i,j] = tmp | |
end | |
out | |
end | |
function avx2d_unpack!(out::AbstractMatrix, A::AbstractMatrix, kern::OffsetArray, R=CartesianIndices(out), z=zero(eltype(out))) | |
rng1k, rng2k = axes(kern) | |
rng1kf, rng1kl = first(rng1k), last(rng1k) | |
rng2kf, rng2kl = first(rng2k), last(rng2k) | |
rng1, rng2 = R.indices | |
# Manually unpack the OffsetArray | |
kernA = parent(kern) | |
o1, o2 = kern.offsets | |
for j in rng2, i in rng1 | |
tmp = z | |
@avx for jk in rng2kf:rng2kl, ik in rng1kf:rng1kl | |
tmp += A[i+ik,j+jk]*kernA[ik-o1,jk-o2] | |
end | |
out[i,j] = tmp | |
end | |
out | |
end | |
A = rand(Float32, 100, 100) | |
kern = centered(rand(Float32, 3, 3)) | |
out = OffsetArray(similar(A, size(A).-2), 1, 1) # stay away from the edges of A |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment