Created
November 9, 2021 08:57
-
-
Save nicolasvasilache/2c773b86fcda01cc28711828a0a9ce0a to your computer and use it in GitHub Desktop.
Compiled to avx2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.text | |
.file "LLVMDialectModule" | |
.globl transpose_2d_on_tensors # -- Begin function transpose_2d_on_tensors | |
.p2align 4, 0x90 | |
.type transpose_2d_on_tensors,@function | |
transpose_2d_on_tensors: # @transpose_2d_on_tensors | |
# %bb.0: | |
movq 24(%rsp), %rax | |
vmovups (%rsi), %ymm0 | |
vmovups 32(%rsi), %ymm1 | |
vmovups 64(%rsi), %ymm2 | |
vmovups 96(%rsi), %ymm3 | |
vmovups 128(%rsi), %ymm4 | |
vmovups 160(%rsi), %ymm5 | |
vmovups 192(%rsi), %ymm6 | |
vmovups 224(%rsi), %ymm7 | |
vunpcklps %ymm1, %ymm0, %ymm8 # ymm8 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] | |
vunpckhps %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] | |
vunpcklps %ymm3, %ymm2, %ymm1 # ymm1 = ymm2[0],ymm3[0],ymm2[1],ymm3[1],ymm2[4],ymm3[4],ymm2[5],ymm3[5] | |
vunpckhps %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[2],ymm3[2],ymm2[3],ymm3[3],ymm2[6],ymm3[6],ymm2[7],ymm3[7] | |
vunpcklps %ymm5, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm5[0],ymm4[1],ymm5[1],ymm4[4],ymm5[4],ymm4[5],ymm5[5] | |
vunpckhps %ymm5, %ymm4, %ymm4 # ymm4 = ymm4[2],ymm5[2],ymm4[3],ymm5[3],ymm4[6],ymm5[6],ymm4[7],ymm5[7] | |
vunpcklps %ymm7, %ymm6, %ymm5 # ymm5 = ymm6[0],ymm7[0],ymm6[1],ymm7[1],ymm6[4],ymm7[4],ymm6[5],ymm7[5] | |
vunpckhps %ymm7, %ymm6, %ymm6 # ymm6 = ymm6[2],ymm7[2],ymm6[3],ymm7[3],ymm6[6],ymm7[6],ymm6[7],ymm7[7] | |
vunpcklpd %ymm1, %ymm8, %ymm7 # ymm7 = ymm8[0],ymm1[0],ymm8[2],ymm1[2] | |
vunpckhpd %ymm1, %ymm8, %ymm1 # ymm1 = ymm8[1],ymm1[1],ymm8[3],ymm1[3] | |
vunpcklpd %ymm2, %ymm0, %ymm8 # ymm8 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] | |
vunpckhpd %ymm2, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] | |
vunpcklpd %ymm5, %ymm3, %ymm2 # ymm2 = ymm3[0],ymm5[0],ymm3[2],ymm5[2] | |
vunpckhpd %ymm5, %ymm3, %ymm3 # ymm3 = ymm3[1],ymm5[1],ymm3[3],ymm5[3] | |
vunpcklpd %ymm6, %ymm4, %ymm5 # ymm5 = ymm4[0],ymm6[0],ymm4[2],ymm6[2] | |
vunpckhpd %ymm6, %ymm4, %ymm4 # ymm4 = ymm4[1],ymm6[1],ymm4[3],ymm6[3] | |
vinsertf128 $1, %xmm2, %ymm7, %ymm6 | |
vinsertf128 $1, %xmm3, %ymm1, %ymm9 | |
vinsertf128 $1, %xmm5, %ymm8, %ymm10 | |
vinsertf128 $1, %xmm4, %ymm0, %ymm11 | |
vperm2f128 $49, %ymm2, %ymm7, %ymm2 # ymm2 = ymm7[2,3],ymm2[2,3] | |
vperm2f128 $49, %ymm3, %ymm1, %ymm1 # ymm1 = ymm1[2,3],ymm3[2,3] | |
vperm2f128 $49, %ymm5, %ymm8, %ymm3 # ymm3 = ymm8[2,3],ymm5[2,3] | |
vperm2f128 $49, %ymm4, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm4[2,3] | |
vmovups %ymm6, (%rax) | |
vmovups %ymm9, 32(%rax) | |
vmovups %ymm10, 64(%rax) | |
vmovups %ymm11, 96(%rax) | |
vmovups %ymm2, 128(%rax) | |
vmovups %ymm1, 160(%rax) | |
vmovups %ymm3, 192(%rax) | |
vmovups %ymm0, 224(%rax) | |
vzeroupper | |
retq | |
.Lfunc_end0: | |
.size transpose_2d_on_tensors, .Lfunc_end0-transpose_2d_on_tensors | |
# -- End function | |
.section ".note.GNU-stack","",@progbits |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment