Skip to content

Instantly share code, notes, and snippets.

@nicolasvasilache
Created November 9, 2021 08:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nicolasvasilache/2c773b86fcda01cc28711828a0a9ce0a to your computer and use it in GitHub Desktop.
Save nicolasvasilache/2c773b86fcda01cc28711828a0a9ce0a to your computer and use it in GitHub Desktop.
Compiled to avx2
.text
.file "LLVMDialectModule"
.globl transpose_2d_on_tensors # -- Begin function transpose_2d_on_tensors
.p2align 4, 0x90
.type transpose_2d_on_tensors,@function
transpose_2d_on_tensors: # @transpose_2d_on_tensors
# %bb.0:
movq 24(%rsp), %rax
vmovups (%rsi), %ymm0
vmovups 32(%rsi), %ymm1
vmovups 64(%rsi), %ymm2
vmovups 96(%rsi), %ymm3
vmovups 128(%rsi), %ymm4
vmovups 160(%rsi), %ymm5
vmovups 192(%rsi), %ymm6
vmovups 224(%rsi), %ymm7
vunpcklps %ymm1, %ymm0, %ymm8 # ymm8 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
vunpckhps %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
vunpcklps %ymm3, %ymm2, %ymm1 # ymm1 = ymm2[0],ymm3[0],ymm2[1],ymm3[1],ymm2[4],ymm3[4],ymm2[5],ymm3[5]
vunpckhps %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[2],ymm3[2],ymm2[3],ymm3[3],ymm2[6],ymm3[6],ymm2[7],ymm3[7]
vunpcklps %ymm5, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm5[0],ymm4[1],ymm5[1],ymm4[4],ymm5[4],ymm4[5],ymm5[5]
vunpckhps %ymm5, %ymm4, %ymm4 # ymm4 = ymm4[2],ymm5[2],ymm4[3],ymm5[3],ymm4[6],ymm5[6],ymm4[7],ymm5[7]
vunpcklps %ymm7, %ymm6, %ymm5 # ymm5 = ymm6[0],ymm7[0],ymm6[1],ymm7[1],ymm6[4],ymm7[4],ymm6[5],ymm7[5]
vunpckhps %ymm7, %ymm6, %ymm6 # ymm6 = ymm6[2],ymm7[2],ymm6[3],ymm7[3],ymm6[6],ymm7[6],ymm6[7],ymm7[7]
vunpcklpd %ymm1, %ymm8, %ymm7 # ymm7 = ymm8[0],ymm1[0],ymm8[2],ymm1[2]
vunpckhpd %ymm1, %ymm8, %ymm1 # ymm1 = ymm8[1],ymm1[1],ymm8[3],ymm1[3]
vunpcklpd %ymm2, %ymm0, %ymm8 # ymm8 = ymm0[0],ymm2[0],ymm0[2],ymm2[2]
vunpckhpd %ymm2, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
vunpcklpd %ymm5, %ymm3, %ymm2 # ymm2 = ymm3[0],ymm5[0],ymm3[2],ymm5[2]
vunpckhpd %ymm5, %ymm3, %ymm3 # ymm3 = ymm3[1],ymm5[1],ymm3[3],ymm5[3]
vunpcklpd %ymm6, %ymm4, %ymm5 # ymm5 = ymm4[0],ymm6[0],ymm4[2],ymm6[2]
vunpckhpd %ymm6, %ymm4, %ymm4 # ymm4 = ymm4[1],ymm6[1],ymm4[3],ymm6[3]
vinsertf128 $1, %xmm2, %ymm7, %ymm6
vinsertf128 $1, %xmm3, %ymm1, %ymm9
vinsertf128 $1, %xmm5, %ymm8, %ymm10
vinsertf128 $1, %xmm4, %ymm0, %ymm11
vperm2f128 $49, %ymm2, %ymm7, %ymm2 # ymm2 = ymm7[2,3],ymm2[2,3]
vperm2f128 $49, %ymm3, %ymm1, %ymm1 # ymm1 = ymm1[2,3],ymm3[2,3]
vperm2f128 $49, %ymm5, %ymm8, %ymm3 # ymm3 = ymm8[2,3],ymm5[2,3]
vperm2f128 $49, %ymm4, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm4[2,3]
vmovups %ymm6, (%rax)
vmovups %ymm9, 32(%rax)
vmovups %ymm10, 64(%rax)
vmovups %ymm11, 96(%rax)
vmovups %ymm2, 128(%rax)
vmovups %ymm1, 160(%rax)
vmovups %ymm3, 192(%rax)
vmovups %ymm0, 224(%rax)
vzeroupper
retq
.Lfunc_end0:
.size transpose_2d_on_tensors, .Lfunc_end0-transpose_2d_on_tensors
# -- End function
.section ".note.GNU-stack","",@progbits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment