Created
May 10, 2016 07:27
-
-
Save 9il/58c1b80110de2db5f2eff6999346a928 to your computer and use it in GitHub Desktop.
gemmMicroKernel!(Conj.none, No.add, 1, 3, 3, __vector(double[4]), double);
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.cfi_startproc | |
pushq %r14 | |
Ltmp0: | |
.cfi_def_cfa_offset 16 | |
pushq %rbx | |
Ltmp1: | |
.cfi_def_cfa_offset 24 | |
Ltmp2: | |
.cfi_offset %rbx, -24 | |
Ltmp3: | |
.cfi_offset %r14, -16 | |
movq %r8, %rax | |
negq %rax | |
movq %r8, %rbx | |
shlq $5, %rbx | |
leaq (%rbx,%rbx,2), %r9 | |
leaq (%r8,%r8,2), %r10 | |
leaq (%rax,%rax,2), %r11 | |
.align 4, 0x90 | |
LBB0_1: | |
leaq (%rsi,%r10,8), %r14 | |
vxorpd %ymm0, %ymm0, %ymm0 | |
xorl %ebx, %ebx | |
vxorpd %ymm1, %ymm1, %ymm1 | |
vxorpd %ymm2, %ymm2, %ymm2 | |
vxorpd %ymm3, %ymm3, %ymm3 | |
vxorpd %ymm6, %ymm6, %ymm6 | |
vxorpd %ymm7, %ymm7, %ymm7 | |
vxorpd %ymm8, %ymm8, %ymm8 | |
movq %r8, %rax | |
vxorpd %ymm4, %ymm4, %ymm4 | |
vxorpd %ymm5, %ymm5, %ymm5 | |
.align 4, 0x90 | |
LBB0_2: | |
vmovapd (%rdx,%rbx,4), %ymm9 | |
vmovapd 32(%rdx,%rbx,4), %ymm10 | |
vmovapd 64(%rdx,%rbx,4), %ymm11 | |
vbroadcastsd (%rsi,%rbx), %ymm12 | |
vbroadcastsd 8(%rsi,%rbx), %ymm13 | |
vbroadcastsd 16(%rsi,%rbx), %ymm14 | |
vmulpd %ymm12, %ymm9, %ymm15 | |
vaddpd %ymm15, %ymm8, %ymm8 | |
vmulpd %ymm12, %ymm10, %ymm15 | |
vaddpd %ymm15, %ymm7, %ymm7 | |
vmulpd %ymm12, %ymm11, %ymm12 | |
vaddpd %ymm12, %ymm6, %ymm6 | |
vmulpd %ymm13, %ymm9, %ymm12 | |
vaddpd %ymm12, %ymm3, %ymm3 | |
vmulpd %ymm13, %ymm10, %ymm12 | |
vaddpd %ymm12, %ymm2, %ymm2 | |
vmulpd %ymm13, %ymm11, %ymm12 | |
vaddpd %ymm12, %ymm1, %ymm1 | |
vmulpd %ymm14, %ymm9, %ymm9 | |
vaddpd %ymm9, %ymm0, %ymm0 | |
vmulpd %ymm14, %ymm10, %ymm9 | |
vaddpd %ymm9, %ymm4, %ymm4 | |
vmulpd %ymm14, %ymm11, %ymm9 | |
vaddpd %ymm9, %ymm5, %ymm5 | |
addq $24, %rbx | |
addq $-1, %rax | |
jne LBB0_2 | |
leaq (%r14,%r11,8), %rsi | |
vmovapd %ymm8, (%rdi) | |
vmovapd %ymm7, 32(%rdi) | |
vmovapd %ymm6, 64(%rdi) | |
vmovapd %ymm3, 96(%rdi) | |
vmovapd %ymm2, 128(%rdi) | |
vmovapd %ymm1, 160(%rdi) | |
vmovapd %ymm0, 192(%rdi) | |
vmovapd %ymm4, 224(%rdi) | |
vmovapd %ymm5, 256(%rdi) | |
addq $288, %rdi | |
addq %r9, %rdx | |
addq $-1, %rcx | |
jne LBB0_1 | |
popq %rbx | |
popq %r14 | |
vzeroupper | |
retq | |
.cfi_endproc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment