Created
July 4, 2016 05:45
-
-
Save 9il/a167e56d7923185f6ce253ee14969b7f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vbroadcastsd %xmm0, %ymm0 | |
vmulpd (%rcx), %ymm0, %ymm7 | |
vmulpd 32(%rcx), %ymm0, %ymm6 | |
vmulpd 64(%rcx), %ymm0, %ymm5 | |
vmulpd 96(%rcx), %ymm0, %ymm4 | |
vmulpd 128(%rcx), %ymm0, %ymm3 | |
vmulpd 160(%rcx), %ymm0, %ymm2 | |
vmulpd 192(%rcx), %ymm0, %ymm1 | |
vmulpd 224(%rcx), %ymm0, %ymm0 | |
movq %rsi, %rax | |
movq %rdi, %r8 | |
.align 4, 0x90 | |
LBB0_1: | |
vmovapd (%rdx), %ymm8 | |
vmovapd 32(%rdx), %ymm9 | |
vbroadcastsd (%rax), %ymm10 | |
vbroadcastsd 8(%rax), %ymm11 | |
vbroadcastsd 16(%rax), %ymm12 | |
vbroadcastsd 24(%rax), %ymm13 | |
vfnmadd231pd %ymm10, %ymm8, %ymm7 | |
vfnmadd231pd %ymm10, %ymm9, %ymm6 | |
vfnmadd231pd %ymm11, %ymm8, %ymm5 | |
vfnmadd231pd %ymm11, %ymm9, %ymm4 | |
vfnmadd231pd %ymm12, %ymm8, %ymm3 | |
vfnmadd231pd %ymm12, %ymm9, %ymm2 | |
vfnmadd231pd %ymm13, %ymm8, %ymm1 | |
vfnmadd231pd %ymm13, %ymm9, %ymm0 | |
addq $64, %rdx | |
addq $32, %rax | |
addq $-1, %r8 | |
jne LBB0_1 | |
shlq $5, %rdi | |
vbroadcastsd (%rsi,%rdi), %ymm8 | |
vmulpd %ymm8, %ymm7, %ymm7 | |
vmulpd %ymm8, %ymm6, %ymm6 | |
vbroadcastsd 8(%rsi,%rdi), %ymm8 | |
vmovapd %ymm7, %ymm9 | |
vfnmadd213pd %ymm5, %ymm8, %ymm9 | |
vfnmadd213pd %ymm4, %ymm6, %ymm8 | |
vbroadcastsd 40(%rsi,%rdi), %ymm5 | |
vmulpd %ymm9, %ymm5, %ymm4 | |
vmulpd %ymm8, %ymm5, %ymm5 | |
vbroadcastsd 16(%rsi,%rdi), %ymm8 | |
vmovapd %ymm8, %ymm9 | |
vfnmadd213pd %ymm3, %ymm7, %ymm9 | |
vfnmadd213pd %ymm2, %ymm6, %ymm8 | |
vbroadcastsd 48(%rsi,%rdi), %ymm2 | |
vmovapd %ymm4, %ymm3 | |
vfnmadd213pd %ymm9, %ymm2, %ymm3 | |
vfnmadd213pd %ymm8, %ymm5, %ymm2 | |
vbroadcastsd 80(%rsi,%rdi), %ymm8 | |
vmulpd %ymm3, %ymm8, %ymm3 | |
vmulpd %ymm2, %ymm8, %ymm2 | |
vbroadcastsd 24(%rsi,%rdi), %ymm8 | |
vmovapd %ymm8, %ymm9 | |
vfnmadd213pd %ymm1, %ymm7, %ymm9 | |
vfnmadd213pd %ymm0, %ymm6, %ymm8 | |
vbroadcastsd 56(%rsi,%rdi), %ymm0 | |
vmovapd %ymm0, %ymm1 | |
vfnmadd213pd %ymm9, %ymm4, %ymm1 | |
vfnmadd213pd %ymm8, %ymm5, %ymm0 | |
vbroadcastsd 88(%rsi,%rdi), %ymm8 | |
vmovapd %ymm8, %ymm9 | |
vfnmadd213pd %ymm1, %ymm3, %ymm9 | |
vfnmadd213pd %ymm0, %ymm2, %ymm8 | |
vbroadcastsd 120(%rsi,%rdi), %ymm0 | |
vmulpd %ymm9, %ymm0, %ymm1 | |
vmulpd %ymm8, %ymm0, %ymm0 | |
vmovapd %ymm7, (%rcx) | |
vmovapd %ymm6, 32(%rcx) | |
vmovapd %ymm4, 64(%rcx) | |
vmovapd %ymm5, 96(%rcx) | |
vmovapd %ymm3, 128(%rcx) | |
vmovapd %ymm2, 160(%rcx) | |
vmovapd %ymm1, 192(%rcx) | |
vmovapd %ymm0, 224(%rcx) | |
vzeroupper | |
retq |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.cfi_startproc | |
vbroadcastsd %xmm0, %ymm0 | |
vmulpd (%rcx), %ymm0, %ymm7 | |
vmulpd 32(%rcx), %ymm0, %ymm6 | |
vmulpd 64(%rcx), %ymm0, %ymm5 | |
vmulpd 96(%rcx), %ymm0, %ymm4 | |
vmulpd 128(%rcx), %ymm0, %ymm3 | |
vmulpd 160(%rcx), %ymm0, %ymm2 | |
vmulpd 192(%rcx), %ymm0, %ymm1 | |
vmulpd 224(%rcx), %ymm0, %ymm0 | |
movq %rsi, %rax | |
movq %rdi, %r8 | |
.align 4, 0x90 | |
LBB0_1: | |
vmovapd (%rdx), %ymm8 | |
vmovapd 32(%rdx), %ymm9 | |
vbroadcastsd (%rax), %ymm10 | |
vbroadcastsd 8(%rax), %ymm11 | |
vbroadcastsd 16(%rax), %ymm12 | |
vbroadcastsd 24(%rax), %ymm13 | |
vmulpd %ymm10, %ymm8, %ymm14 | |
vsubpd %ymm14, %ymm7, %ymm7 | |
vmulpd %ymm10, %ymm9, %ymm10 | |
vsubpd %ymm10, %ymm6, %ymm6 | |
vmulpd %ymm11, %ymm8, %ymm10 | |
vsubpd %ymm10, %ymm5, %ymm5 | |
vmulpd %ymm11, %ymm9, %ymm10 | |
vsubpd %ymm10, %ymm4, %ymm4 | |
vmulpd %ymm12, %ymm8, %ymm10 | |
vsubpd %ymm10, %ymm3, %ymm3 | |
vmulpd %ymm12, %ymm9, %ymm10 | |
vsubpd %ymm10, %ymm2, %ymm2 | |
vmulpd %ymm13, %ymm8, %ymm8 | |
vsubpd %ymm8, %ymm1, %ymm1 | |
vmulpd %ymm13, %ymm9, %ymm8 | |
vsubpd %ymm8, %ymm0, %ymm0 | |
addq $64, %rdx | |
addq $32, %rax | |
addq $-1, %r8 | |
jne LBB0_1 | |
shlq $5, %rdi | |
vbroadcastsd (%rsi,%rdi), %ymm8 | |
vmulpd %ymm8, %ymm7, %ymm7 | |
vmulpd %ymm8, %ymm6, %ymm6 | |
vbroadcastsd 8(%rsi,%rdi), %ymm8 | |
vmulpd %ymm7, %ymm8, %ymm9 | |
vsubpd %ymm9, %ymm5, %ymm5 | |
vmulpd %ymm6, %ymm8, %ymm8 | |
vsubpd %ymm8, %ymm4, %ymm8 | |
vbroadcastsd 40(%rsi,%rdi), %ymm9 | |
vmulpd %ymm5, %ymm9, %ymm4 | |
vmulpd %ymm8, %ymm9, %ymm5 | |
vbroadcastsd 16(%rsi,%rdi), %ymm8 | |
vmulpd %ymm8, %ymm7, %ymm9 | |
vsubpd %ymm9, %ymm3, %ymm3 | |
vmulpd %ymm8, %ymm6, %ymm8 | |
vsubpd %ymm8, %ymm2, %ymm2 | |
vbroadcastsd 48(%rsi,%rdi), %ymm8 | |
vmulpd %ymm4, %ymm8, %ymm9 | |
vsubpd %ymm9, %ymm3, %ymm3 | |
vmulpd %ymm5, %ymm8, %ymm8 | |
vsubpd %ymm8, %ymm2, %ymm2 | |
vbroadcastsd 80(%rsi,%rdi), %ymm8 | |
vmulpd %ymm3, %ymm8, %ymm3 | |
vmulpd %ymm2, %ymm8, %ymm2 | |
vbroadcastsd 24(%rsi,%rdi), %ymm8 | |
vmulpd %ymm8, %ymm7, %ymm9 | |
vsubpd %ymm9, %ymm1, %ymm1 | |
vmulpd %ymm8, %ymm6, %ymm8 | |
vsubpd %ymm8, %ymm0, %ymm0 | |
vbroadcastsd 56(%rsi,%rdi), %ymm8 | |
vmulpd %ymm8, %ymm4, %ymm9 | |
vsubpd %ymm9, %ymm1, %ymm1 | |
vmulpd %ymm8, %ymm5, %ymm8 | |
vsubpd %ymm8, %ymm0, %ymm0 | |
vbroadcastsd 88(%rsi,%rdi), %ymm8 | |
vmulpd %ymm8, %ymm3, %ymm9 | |
vsubpd %ymm9, %ymm1, %ymm1 | |
vmulpd %ymm8, %ymm2, %ymm8 | |
vsubpd %ymm8, %ymm0, %ymm0 | |
vbroadcastsd 120(%rsi,%rdi), %ymm8 | |
vmulpd %ymm1, %ymm8, %ymm1 | |
vmulpd %ymm0, %ymm8, %ymm0 | |
vmovapd %ymm7, (%rcx) | |
vmovapd %ymm6, 32(%rcx) | |
vmovapd %ymm4, 64(%rcx) | |
vmovapd %ymm5, 96(%rcx) | |
vmovapd %ymm3, 128(%rcx) | |
vmovapd %ymm2, 160(%rcx) | |
vmovapd %ymm1, 192(%rcx) | |
vmovapd %ymm0, 224(%rcx) | |
vzeroupper | |
retq |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment