Created
December 2, 2018 22:20
-
-
Save perrutquist/6a70cb37cc1dedc2ab3ffadac7a0688f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A small example of how to write Julia code that results in masked AVX instructions | |
""" | |
unsafe_max4(C, A, B) | |
Compute the element-wise maximum of the 4 first elements of vectors `A` and `B`, | |
and store the result C. | |
If any of the vectors is too small, the program might crash, or worse. | |
""" | |
function unsafe_max4!(C, A, B) | |
@inbounds (C[1], C[2], C[3], C[4]) = max.((A[1], A[2], A[3], A[4]), (B[1], B[2], B[3], B[4])) | |
end | |
A = rand(Int, 4) | |
B = rand(Int, 4) | |
C = zeros(Int, 4) | |
@code_native unsafe_max4!(C, A, B) | |
# Results in the following on my machine: | |
# .section __TEXT,__text,regular,pure_instructions | |
# decl %eax | |
# movl (%edx), %eax | |
# vmovdqu (%eax), %ymm0 | |
# decl %eax | |
# movl (%ecx), %eax | |
# vmovdqu (%eax), %ymm1 | |
# vpcmpgtq %ymm1, %ymm0, %ymm2 | |
# vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 | |
# decl %eax | |
# movl (%esi), %eax | |
# vmovupd %ymm0, (%eax) | |
# vmovupd %ymm0, (%edi) | |
# decl %eax | |
# movl %edi, %eax | |
# vzeroupper | |
# retl | |
# nopl (%eax,%eax) | |
# The vblendvpd instruction in the middle is the masked AVX instruction | |
# which picks out the element-wise maximum. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment