Skip to content

Instantly share code, notes, and snippets.

@jmbr
Created March 16, 2018 20:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jmbr/30593fc36f2327ec6b8167c6a97797f0 to your computer and use it in GitHub Desktop.
Save jmbr/30593fc36f2327ec6b8167c6a97797f0 to your computer and use it in GitHub Desktop.
.text
.align 4,0x90
.globl __Z21matrix_vector_productPKdPd
__Z21matrix_vector_productPKdPd:
LFB18:
vmovsd 8(%rdi), %xmm2
vmovsd (%rdi), %xmm1
vmulsd %xmm2, %xmm2, %xmm3
vmovsd 16(%rdi), %xmm0
vmovsd 32(%rdi), %xmm6
vmovsd 56(%rdi), %xmm9
vfmadd231sd %xmm1, %xmm1, %xmm3
vfmadd231sd %xmm0, %xmm0, %xmm3
vmovsd %xmm3, -40(%rsp)
vmulsd %xmm6, %xmm2, %xmm3
vmulsd %xmm9, %xmm2, %xmm2
vmovsd 24(%rdi), %xmm5
vmovsd 40(%rdi), %xmm4
vmovsd 48(%rdi), %xmm8
vfmadd231sd %xmm5, %xmm1, %xmm3
vmovsd 64(%rdi), %xmm7
vfmadd231sd %xmm4, %xmm0, %xmm3
vmovsd %xmm3, -32(%rsp)
vfmadd132sd %xmm8, %xmm2, %xmm1
vfmadd132sd %xmm7, %xmm1, %xmm0
vmovsd %xmm0, -24(%rsp)
movq -24(%rsp), %rax
vmovdqa -40(%rsp), %xmm0
movq %rax, 16(%rsi)
vmovups %xmm0, (%rsi)
ret
LFE18:
.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
EH_frame1:
.set L$set$0,LECIE1-LSCIE1
.long L$set$0
LSCIE1:
.long 0
.byte 0x1
.ascii "zR\0"
.byte 0x1
.byte 0x78
.byte 0x10
.byte 0x1
.byte 0x10
.byte 0xc
.byte 0x7
.byte 0x8
.byte 0x90
.byte 0x1
.align 3
LECIE1:
LSFDE1:
.set L$set$1,LEFDE1-LASFDE1
.long L$set$1
LASFDE1:
.long LASFDE1-EH_frame1
.quad LFB18-.
.set L$set$2,LFE18-LFB18
.quad L$set$2
.byte 0
.align 3
LEFDE1:
.constructor
.destructor
.align 1
.subsections_via_symbols
.text
.align 4,0x90
.globl __Z21matrix_vector_productPKdPd
__Z21matrix_vector_productPKdPd:
LFB18:
vmovsd 8(%rsi), %xmm1
vmovsd (%rsi), %xmm0
vmulsd 8(%rdi), %xmm1, %xmm3
vmovsd 16(%rsi), %xmm2
vfmadd231sd (%rdi), %xmm0, %xmm3
vfmadd231sd 16(%rdi), %xmm2, %xmm3
vmovsd %xmm3, -40(%rsp)
vmulsd 32(%rdi), %xmm1, %xmm3
vfmadd231sd 24(%rdi), %xmm0, %xmm3
vfmadd231sd 40(%rdi), %xmm2, %xmm3
vmulsd 64(%rdi), %xmm2, %xmm2
vmovsd %xmm3, -32(%rsp)
vfmadd132sd 56(%rdi), %xmm2, %xmm1
vfmadd132sd 48(%rdi), %xmm1, %xmm0
vmovsd %xmm0, -24(%rsp)
vmovdqa -40(%rsp), %xmm0
vmovups %xmm0, (%rsi)
vmovdqa -24(%rsp), %xmm0
vmovups %xmm0, 16(%rsi)
ret
LFE18:
.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
EH_frame1:
.set L$set$0,LECIE1-LSCIE1
.long L$set$0
LSCIE1:
.long 0
.byte 0x1
.ascii "zR\0"
.byte 0x1
.byte 0x78
.byte 0x10
.byte 0x1
.byte 0x10
.byte 0xc
.byte 0x7
.byte 0x8
.byte 0x90
.byte 0x1
.align 3
LECIE1:
LSFDE1:
.set L$set$1,LEFDE1-LASFDE1
.long L$set$1
LASFDE1:
.long LASFDE1-EH_frame1
.quad LFB18-.
.set L$set$2,LFE18-LFB18
.quad L$set$2
.byte 0
.align 3
LEFDE1:
.constructor
.destructor
.align 1
.subsections_via_symbols
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment