Skip to content

Instantly share code, notes, and snippets.

@9il
Created March 31, 2017 05:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 9il/e678e40f4432f9a8f52b6efbf1b79c74 to your computer and use it in GitHub Desktop.
Save 9il/e678e40f4432f9a8f52b6efbf1b79c74 to your computer and use it in GitHub Desktop.
.section __TEXT,__text,regular,pure_instructions
.globl _dot_reg_basic_generic
.p2align 4, 0x90
_dot_reg_basic_generic:
.cfi_startproc
vxorps %ymm14, %ymm14, %ymm14
vxorps %ymm1, %ymm1, %ymm1
vxorps %ymm9, %ymm9, %ymm9
vxorps %ymm2, %ymm2, %ymm2
vxorps %ymm7, %ymm7, %ymm7
vxorps %ymm3, %ymm3, %ymm3
vxorps %ymm10, %ymm10, %ymm10
vxorps %ymm4, %ymm4, %ymm4
vxorps %ymm8, %ymm8, %ymm8
vxorps %ymm5, %ymm5, %ymm5
vxorps %ymm11, %ymm11, %ymm11
vxorps %ymm6, %ymm6, %ymm6
movq %rdx, %r8
movq %rdi, %rax
.p2align 4, 0x90
LBB0_1:
vmovaps %ymm11, %ymm12
vmovaps %ymm8, %ymm13
vmovups %ymm10, -120(%rsp)
vmovups %ymm7, -88(%rsp)
vmovups %ymm9, -56(%rsp)
prefetcht0 512(%rax)
vmovaps (%rax), %ymm0
vmovaps 32(%rax), %ymm15
vbroadcastss (%rsi), %ymm11
vbroadcastss 4(%rsi), %ymm8
vfmadd231ps %ymm0, %ymm11, %ymm6
vfmadd213ps %ymm12, %ymm15, %ymm11
vfmadd231ps %ymm0, %ymm8, %ymm5
vfmadd213ps %ymm13, %ymm15, %ymm8
vbroadcastss 8(%rsi), %ymm10
vbroadcastss 12(%rsi), %ymm7
vfmadd231ps %ymm0, %ymm10, %ymm4
vfmadd213ps -120(%rsp), %ymm15, %ymm10
vfmadd231ps %ymm0, %ymm7, %ymm3
vfmadd213ps -88(%rsp), %ymm15, %ymm7
vbroadcastss 16(%rsi), %ymm9
vbroadcastss 20(%rsi), %ymm12
vfmadd231ps %ymm0, %ymm9, %ymm2
vfmadd213ps -56(%rsp), %ymm15, %ymm9
vfmadd231ps %ymm0, %ymm12, %ymm1
vfmadd231ps %ymm15, %ymm12, %ymm14
addq $64, %rax
addq $24, %rsi
addq $-1, %r8
jne LBB0_1
shlq $6, %rdx
addq %rdx, %rdi
vmovaps %ymm6, (%rcx)
vmovaps %ymm11, 32(%rcx)
vmovaps %ymm5, 64(%rcx)
vmovaps %ymm8, 96(%rcx)
vmovaps %ymm4, 128(%rcx)
vmovaps %ymm10, 160(%rcx)
vmovaps %ymm3, 192(%rcx)
vmovaps %ymm7, 224(%rcx)
vmovaps %ymm2, 256(%rcx)
vmovaps %ymm9, 288(%rcx)
vmovaps %ymm1, 320(%rcx)
vmovaps %ymm14, 352(%rcx)
movq %rdi, %rax
vzeroupper
retq
.cfi_endproc
.subsections_via_symbols
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment