Skip to content

Instantly share code, notes, and snippets.

@9il
Created May 10, 2016 07:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 9il/58c1b80110de2db5f2eff6999346a928 to your computer and use it in GitHub Desktop.
Save 9il/58c1b80110de2db5f2eff6999346a928 to your computer and use it in GitHub Desktop.
gemmMicroKernel!(Conj.none, No.add, 1, 3, 3, __vector(double[4]), double);
.cfi_startproc
pushq %r14
Ltmp0:
.cfi_def_cfa_offset 16
pushq %rbx
Ltmp1:
.cfi_def_cfa_offset 24
Ltmp2:
.cfi_offset %rbx, -24
Ltmp3:
.cfi_offset %r14, -16
movq %r8, %rax
negq %rax
movq %r8, %rbx
shlq $5, %rbx
leaq (%rbx,%rbx,2), %r9
leaq (%r8,%r8,2), %r10
leaq (%rax,%rax,2), %r11
.align 4, 0x90
LBB0_1:
leaq (%rsi,%r10,8), %r14
vxorpd %ymm0, %ymm0, %ymm0
xorl %ebx, %ebx
vxorpd %ymm1, %ymm1, %ymm1
vxorpd %ymm2, %ymm2, %ymm2
vxorpd %ymm3, %ymm3, %ymm3
vxorpd %ymm6, %ymm6, %ymm6
vxorpd %ymm7, %ymm7, %ymm7
vxorpd %ymm8, %ymm8, %ymm8
movq %r8, %rax
vxorpd %ymm4, %ymm4, %ymm4
vxorpd %ymm5, %ymm5, %ymm5
.align 4, 0x90
LBB0_2:
vmovapd (%rdx,%rbx,4), %ymm9
vmovapd 32(%rdx,%rbx,4), %ymm10
vmovapd 64(%rdx,%rbx,4), %ymm11
vbroadcastsd (%rsi,%rbx), %ymm12
vbroadcastsd 8(%rsi,%rbx), %ymm13
vbroadcastsd 16(%rsi,%rbx), %ymm14
vmulpd %ymm12, %ymm9, %ymm15
vaddpd %ymm15, %ymm8, %ymm8
vmulpd %ymm12, %ymm10, %ymm15
vaddpd %ymm15, %ymm7, %ymm7
vmulpd %ymm12, %ymm11, %ymm12
vaddpd %ymm12, %ymm6, %ymm6
vmulpd %ymm13, %ymm9, %ymm12
vaddpd %ymm12, %ymm3, %ymm3
vmulpd %ymm13, %ymm10, %ymm12
vaddpd %ymm12, %ymm2, %ymm2
vmulpd %ymm13, %ymm11, %ymm12
vaddpd %ymm12, %ymm1, %ymm1
vmulpd %ymm14, %ymm9, %ymm9
vaddpd %ymm9, %ymm0, %ymm0
vmulpd %ymm14, %ymm10, %ymm9
vaddpd %ymm9, %ymm4, %ymm4
vmulpd %ymm14, %ymm11, %ymm9
vaddpd %ymm9, %ymm5, %ymm5
addq $24, %rbx
addq $-1, %rax
jne LBB0_2
leaq (%r14,%r11,8), %rsi
vmovapd %ymm8, (%rdi)
vmovapd %ymm7, 32(%rdi)
vmovapd %ymm6, 64(%rdi)
vmovapd %ymm3, 96(%rdi)
vmovapd %ymm2, 128(%rdi)
vmovapd %ymm1, 160(%rdi)
vmovapd %ymm0, 192(%rdi)
vmovapd %ymm4, 224(%rdi)
vmovapd %ymm5, 256(%rdi)
addq $288, %rdi
addq %r9, %rdx
addq $-1, %rcx
jne LBB0_1
popq %rbx
popq %r14
vzeroupper
retq
.cfi_endproc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment