Skip to content

Instantly share code, notes, and snippets.

@maleadt
Created June 21, 2021 16:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maleadt/3663461e8232c1db6695f71c41804525 to your computer and use it in GitHub Desktop.
Save maleadt/3663461e8232c1db6695f71c41804525 to your computer and use it in GitHub Desktop.
(gdb) f 0
#0 0x00007fffe06dee9d in zgemm_kernel_n_SANDYBRIDGE () from /home/tbesard/.cache/jl/installs/bin/linux/x64/1.7/julia-latest-linux64/bin/../lib/julia/libopenblas64_.so
(gdb) disassemble
Dump of assembler code for function zgemm_kernel_n_SANDYBRIDGE:
0x00007fffe06dee00 <+0>: sub $0x80,%rsp
0x00007fffe06dee07 <+7>: mov %rbx,(%rsp)
0x00007fffe06dee0b <+11>: mov %rbp,0x8(%rsp)
0x00007fffe06dee10 <+16>: mov %r12,0x10(%rsp)
0x00007fffe06dee15 <+21>: mov %r13,0x18(%rsp)
0x00007fffe06dee1a <+26>: mov %r14,0x20(%rsp)
0x00007fffe06dee1f <+31>: mov %r15,0x28(%rsp)
0x00007fffe06dee24 <+36>: mov 0x88(%rsp),%rbp
0x00007fffe06dee2c <+44>: vmovlps %xmm0,0x30(%rsp)
0x00007fffe06dee32 <+50>: vmovlps %xmm1,0x38(%rsp)
0x00007fffe06dee38 <+56>: sub $0xffffffffffffff80,%rcx
0x00007fffe06dee3c <+60>: sub $0xffffffffffffff80,%r8
0x00007fffe06dee40 <+64>: mov %rdi,%r13
0x00007fffe06dee43 <+67>: mov %rsi,%r14
0x00007fffe06dee46 <+70>: mov %rdx,%r15
0x00007fffe06dee49 <+73>: shl $0x4,%rbp
0x00007fffe06dee4d <+77>: test %r13,%r13
0x00007fffe06dee50 <+80>: jle 0x7fffe06df6e0 <zgemm_kernel_n_SANDYBRIDGE+2272>
0x00007fffe06dee56 <+86>: mov %r14,0x40(%rsp)
0x00007fffe06dee5b <+91>: sarq $0x2,0x40(%rsp)
0x00007fffe06dee61 <+97>: jle,pn 0x7fffe06df2d0 <zgemm_kernel_n_SANDYBRIDGE+1232>
0x00007fffe06dee68 <+104>: nopl 0x0(%rax,%rax,1)
0x00007fffe06dee70 <+112>: mov %r9,%rbx
0x00007fffe06dee73 <+115>: lea (%r9,%rbp,2),%rdx
0x00007fffe06dee77 <+119>: mov %rcx,%rdi
0x00007fffe06dee7a <+122>: mov %r15,%rax
0x00007fffe06dee7d <+125>: shl $0x6,%rax
0x00007fffe06dee81 <+129>: lea (%r8,%rax,1),%r12
0x00007fffe06dee85 <+133>: mov %r13,%r11
0x00007fffe06dee88 <+136>: nopl 0x0(%rax,%rax,1)
0x00007fffe06dee90 <+144>: prefetcht2 -0x80(%r12)
0x00007fffe06dee96 <+150>: sub $0xffffffffffffffc0,%r12
0x00007fffe06dee9a <+154>: mov %r8,%rsi
=> 0x00007fffe06dee9d <+157>: vxorpd %xmm1,%xmm1,%xmm1
0x00007fffe06deea2 <+162>: vxorps %xmm2,%xmm2,%xmm2
0x00007fffe06deea6 <+166>: vxorps %xmm3,%xmm3,%xmm3
0x00007fffe06deeaa <+170>: vxorps %xmm4,%xmm4,%xmm4
0x00007fffe06deeae <+174>: vxorps %xmm8,%xmm8,%xmm8
0x00007fffe06deeb3 <+179>: prefetcht0 0x8(%rbx)
0x00007fffe06deeb7 <+183>: vxorps %xmm9,%xmm9,%xmm9
0x00007fffe06deebc <+188>: vxorps %xmm10,%xmm10,%xmm10
0x00007fffe06deec1 <+193>: prefetcht0 0x18(%rbx,%rbp,1)
0x00007fffe06deec6 <+198>: vxorps %xmm11,%xmm11,%xmm11
0x00007fffe06deecb <+203>: vmovaps -0x80(%rdi),%xmm0
0x00007fffe06deed0 <+208>: vxorps %xmm12,%xmm12,%xmm12
0x00007fffe06deed5 <+213>: vxorps %xmm13,%xmm13,%xmm13
0x00007fffe06deeda <+218>: prefetcht0 0x8(%rdx)
0x00007fffe06deede <+222>: vxorps %xmm14,%xmm14,%xmm14
0x00007fffe06deee3 <+227>: vxorps %xmm15,%xmm15,%xmm15
0x00007fffe06deee8 <+232>: prefetcht0 0x18(%rdx,%rbp,1)
0x00007fffe06deeed <+237>: mov %r15,%rax
--Type <RET> for more, q to quit, c to continue without paging--
0x00007fffe06deef0 <+240>: sar $0x2,%rax
0x00007fffe06deef4 <+244>: jle,pn 0x7fffe06df0d0 <zgemm_kernel_n_SANDYBRIDGE+720>
0x00007fffe06deefb <+251>: nopl 0x0(%rax,%rax,1)
0x00007fffe06def00 <+256>: prefetcht0 0x20(%rdi)
0x00007fffe06def04 <+260>: vaddpd %xmm1,%xmm12,%xmm12
0x00007fffe06def08 <+264>: vmovaps -0x80(%rsi),%xmm1
0x00007fffe06def0d <+269>: vaddpd %xmm2,%xmm13,%xmm13
0x00007fffe06def11 <+273>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06def16 <+278>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06def1a <+282>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06def1e <+286>: vaddpd %xmm3,%xmm14,%xmm14
0x00007fffe06def22 <+290>: vmovaps -0x70(%rsi),%xmm3
0x00007fffe06def27 <+295>: vaddpd %xmm4,%xmm15,%xmm15
0x00007fffe06def2b <+299>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06def30 <+304>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06def34 <+308>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06def38 <+312>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06def3c <+316>: vmovaps -0x60(%rsi),%xmm1
0x00007fffe06def41 <+321>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06def45 <+325>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06def4a <+330>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06def4e <+334>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06def52 <+338>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06def56 <+342>: vmovaps -0x50(%rsi),%xmm3
0x00007fffe06def5b <+347>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06def5f <+351>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06def64 <+356>: vmovaps -0x70(%rdi),%xmm5
0x00007fffe06def69 <+361>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06def6d <+365>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06def71 <+369>: vaddpd %xmm1,%xmm12,%xmm12
0x00007fffe06def75 <+373>: vmovaps -0x40(%rsi),%xmm1
0x00007fffe06def7a <+378>: vaddpd %xmm2,%xmm13,%xmm13
0x00007fffe06def7e <+382>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06def83 <+387>: vmulpd %xmm5,%xmm1,%xmm1
0x00007fffe06def87 <+391>: vmulpd %xmm5,%xmm2,%xmm2
0x00007fffe06def8b <+395>: vaddpd %xmm3,%xmm14,%xmm14
0x00007fffe06def8f <+399>: vmovaps -0x30(%rsi),%xmm3
0x00007fffe06def94 <+404>: vaddpd %xmm4,%xmm15,%xmm15
0x00007fffe06def98 <+408>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06def9d <+413>: vmulpd %xmm5,%xmm3,%xmm3
0x00007fffe06defa1 <+417>: vmulpd %xmm5,%xmm4,%xmm4
0x00007fffe06defa5 <+421>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06defa9 <+425>: vmovaps -0x20(%rsi),%xmm1
0x00007fffe06defae <+430>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06defb2 <+434>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06defb7 <+439>: vmulpd %xmm5,%xmm1,%xmm1
0x00007fffe06defbb <+443>: vmulpd %xmm5,%xmm2,%xmm2
0x00007fffe06defbf <+447>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06defc3 <+451>: vmovaps -0x10(%rsi),%xmm3
0x00007fffe06defc8 <+456>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06defcc <+460>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06defd1 <+465>: vmovaps -0x60(%rdi),%xmm0
--Type <RET> for more, q to quit, c to continue without paging--
0x00007fffe06defd6 <+470>: vmulpd %xmm5,%xmm3,%xmm3
0x00007fffe06defda <+474>: vmulpd %xmm5,%xmm4,%xmm4
0x00007fffe06defde <+478>: vaddpd %xmm1,%xmm12,%xmm12
0x00007fffe06defe2 <+482>: vmovaps (%rsi),%xmm1
0x00007fffe06defe6 <+486>: vaddpd %xmm2,%xmm13,%xmm13
0x00007fffe06defea <+490>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06defef <+495>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06deff3 <+499>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06deff7 <+503>: vaddpd %xmm3,%xmm14,%xmm14
0x00007fffe06deffb <+507>: vmovaps 0x10(%rsi),%xmm3
0x00007fffe06df000 <+512>: vaddpd %xmm4,%xmm15,%xmm15
0x00007fffe06df004 <+516>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df009 <+521>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06df00d <+525>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06df011 <+529>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df015 <+533>: vmovaps 0x20(%rsi),%xmm1
0x00007fffe06df01a <+538>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df01e <+542>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df023 <+547>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df027 <+551>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df02b <+555>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06df02f <+559>: vmovaps 0x30(%rsi),%xmm3
0x00007fffe06df034 <+564>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06df038 <+568>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df03d <+573>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06df041 <+577>: vmovaps -0x50(%rdi),%xmm5
0x00007fffe06df046 <+582>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06df04a <+586>: vaddpd %xmm1,%xmm12,%xmm12
0x00007fffe06df04e <+590>: vmovaps 0x40(%rsi),%xmm1
0x00007fffe06df053 <+595>: vaddpd %xmm2,%xmm13,%xmm13
0x00007fffe06df057 <+599>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df05c <+604>: vmulpd %xmm5,%xmm1,%xmm1
0x00007fffe06df060 <+608>: vmulpd %xmm5,%xmm2,%xmm2
0x00007fffe06df064 <+612>: vaddpd %xmm3,%xmm14,%xmm14
0x00007fffe06df068 <+616>: vmovaps 0x50(%rsi),%xmm3
0x00007fffe06df06d <+621>: vaddpd %xmm4,%xmm15,%xmm15
0x00007fffe06df071 <+625>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df076 <+630>: vmulpd %xmm5,%xmm3,%xmm3
0x00007fffe06df07a <+634>: data16 vmulpd %xmm5,%xmm4,%xmm4
0x00007fffe06df07f <+639>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df083 <+643>: vmovaps 0x60(%rsi),%xmm1
0x00007fffe06df088 <+648>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df08c <+652>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df091 <+657>: vmulpd %xmm5,%xmm1,%xmm1
0x00007fffe06df095 <+661>: data16 vmulpd %xmm5,%xmm2,%xmm2
0x00007fffe06df09a <+666>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06df09e <+670>: vmovaps 0x70(%rsi),%xmm3
0x00007fffe06df0a3 <+675>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06df0a7 <+679>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df0ac <+684>: vmulpd %xmm5,%xmm3,%xmm3
0x00007fffe06df0b0 <+688>: vmovaps -0x40(%rdi),%xmm0
0x00007fffe06df0b5 <+693>: vmulpd %xmm5,%xmm4,%xmm4
--Type <RET> for more, q to quit, c to continue without paging--
0x00007fffe06df0b9 <+697>: sub $0xffffffffffffff00,%rsi
0x00007fffe06df0c0 <+704>: sub $0xffffffffffffffc0,%rdi
0x00007fffe06df0c4 <+708>: sub $0x1,%rax
0x00007fffe06df0c8 <+712>: jg,pt 0x7fffe06def00 <zgemm_kernel_n_SANDYBRIDGE+256>
0x00007fffe06df0cf <+719>: nop
0x00007fffe06df0d0 <+720>: vmovddup 0x30(%rsp),%xmm6
0x00007fffe06df0d6 <+726>: vmovddup 0x38(%rsp),%xmm7
0x00007fffe06df0dc <+732>: mov %r15,%rax
0x00007fffe06df0df <+735>: and $0x3,%rax
0x00007fffe06df0e3 <+739>: je,pt 0x7fffe06df170 <zgemm_kernel_n_SANDYBRIDGE+880>
0x00007fffe06df0ea <+746>: nopw 0x0(%rax,%rax,1)
0x00007fffe06df0f0 <+752>: vaddpd %xmm1,%xmm12,%xmm12
0x00007fffe06df0f4 <+756>: vmovaps -0x80(%rsi),%xmm1
0x00007fffe06df0f9 <+761>: vaddpd %xmm2,%xmm13,%xmm13
0x00007fffe06df0fd <+765>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df102 <+770>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df106 <+774>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df10a <+778>: vaddpd %xmm3,%xmm14,%xmm14
0x00007fffe06df10e <+782>: vmovaps -0x70(%rsi),%xmm3
0x00007fffe06df113 <+787>: vaddpd %xmm4,%xmm15,%xmm15
0x00007fffe06df117 <+791>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df11c <+796>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06df120 <+800>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06df124 <+804>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df128 <+808>: vmovaps -0x60(%rsi),%xmm1
0x00007fffe06df12d <+813>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df131 <+817>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df136 <+822>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df13a <+826>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df13e <+830>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06df142 <+834>: vmovaps -0x50(%rsi),%xmm3
0x00007fffe06df147 <+839>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06df14b <+843>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df150 <+848>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06df154 <+852>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06df158 <+856>: vmovaps -0x70(%rdi),%xmm0
0x00007fffe06df15d <+861>: add $0x10,%rdi
0x00007fffe06df161 <+865>: add $0x40,%rsi
0x00007fffe06df165 <+869>: sub $0x1,%rax
0x00007fffe06df169 <+873>: jg,pt 0x7fffe06df0f0 <zgemm_kernel_n_SANDYBRIDGE+752>
0x00007fffe06df16c <+876>: nopl 0x0(%rax)
0x00007fffe06df170 <+880>: vaddpd %xmm1,%xmm12,%xmm12
0x00007fffe06df174 <+884>: vaddpd %xmm2,%xmm13,%xmm13
0x00007fffe06df178 <+888>: vaddpd %xmm3,%xmm14,%xmm14
0x00007fffe06df17c <+892>: vaddpd %xmm4,%xmm15,%xmm15
0x00007fffe06df180 <+896>: vpcmpeqb %xmm0,%xmm0,%xmm0
0x00007fffe06df184 <+900>: vpsllq $0x3f,%xmm0,%xmm0
0x00007fffe06df189 <+905>: vshufps $0x40,%xmm0,%xmm0,%xmm0
0x00007fffe06df18e <+910>: vxorps %xmm0,%xmm8,%xmm8
0x00007fffe06df192 <+914>: vxorps %xmm0,%xmm10,%xmm10
0x00007fffe06df196 <+918>: vxorps %xmm0,%xmm12,%xmm12
0x00007fffe06df19a <+922>: vxorps %xmm0,%xmm14,%xmm14
--Type <RET> for more, q to quit, c to continue without paging--
0x00007fffe06df19e <+926>: vhaddpd %xmm9,%xmm8,%xmm8
0x00007fffe06df1a3 <+931>: vhaddpd %xmm11,%xmm10,%xmm10
0x00007fffe06df1a8 <+936>: vhaddpd %xmm13,%xmm12,%xmm12
0x00007fffe06df1ad <+941>: vhaddpd %xmm15,%xmm14,%xmm14
0x00007fffe06df1b2 <+946>: vpshufd $0x4e,%xmm8,%xmm9
0x00007fffe06df1b8 <+952>: vpshufd $0x4e,%xmm10,%xmm11
0x00007fffe06df1be <+958>: vpshufd $0x4e,%xmm12,%xmm13
0x00007fffe06df1c4 <+964>: vpshufd $0x4e,%xmm14,%xmm15
0x00007fffe06df1ca <+970>: vmulpd %xmm6,%xmm8,%xmm8
0x00007fffe06df1ce <+974>: vmulpd %xmm7,%xmm9,%xmm9
0x00007fffe06df1d2 <+978>: vmulpd %xmm6,%xmm10,%xmm10
0x00007fffe06df1d6 <+982>: vmulpd %xmm7,%xmm11,%xmm11
0x00007fffe06df1da <+986>: vmulpd %xmm6,%xmm12,%xmm12
0x00007fffe06df1de <+990>: vmulpd %xmm7,%xmm13,%xmm13
0x00007fffe06df1e2 <+994>: vmulpd %xmm6,%xmm14,%xmm14
0x00007fffe06df1e6 <+998>: vmulpd %xmm7,%xmm15,%xmm15
0x00007fffe06df1ea <+1002>: vaddsubpd %xmm9,%xmm8,%xmm8
0x00007fffe06df1ef <+1007>: vaddsubpd %xmm11,%xmm10,%xmm10
0x00007fffe06df1f4 <+1012>: vaddsubpd %xmm13,%xmm12,%xmm12
0x00007fffe06df1f9 <+1017>: vaddsubpd %xmm15,%xmm14,%xmm14
0x00007fffe06df1fe <+1022>: test $0xf,%rbx
0x00007fffe06df205 <+1029>: jne,pn 0x7fffe06df270 <zgemm_kernel_n_SANDYBRIDGE+1136>
0x00007fffe06df208 <+1032>: vmovaps (%rbx),%xmm0
0x00007fffe06df20c <+1036>: vmovaps (%rbx,%rbp,1),%xmm1
0x00007fffe06df211 <+1041>: vmovaps (%rdx),%xmm2
0x00007fffe06df215 <+1045>: vmovaps (%rdx,%rbp,1),%xmm3
0x00007fffe06df21a <+1050>: vaddpd %xmm0,%xmm8,%xmm8
0x00007fffe06df21e <+1054>: vaddpd %xmm1,%xmm10,%xmm10
0x00007fffe06df222 <+1058>: vaddpd %xmm2,%xmm12,%xmm12
0x00007fffe06df226 <+1062>: vaddpd %xmm3,%xmm14,%xmm14
0x00007fffe06df22a <+1066>: vmovaps %xmm8,(%rbx)
0x00007fffe06df22e <+1070>: vmovaps %xmm10,(%rbx,%rbp,1)
0x00007fffe06df233 <+1075>: vmovaps %xmm12,(%rdx)
0x00007fffe06df237 <+1079>: vmovaps %xmm14,(%rdx,%rbp,1)
0x00007fffe06df23c <+1084>: add $0x10,%rbx
0x00007fffe06df240 <+1088>: add $0x10,%rdx
0x00007fffe06df244 <+1092>: dec %r11
0x00007fffe06df247 <+1095>: jg,pt 0x7fffe06dee90 <zgemm_kernel_n_SANDYBRIDGE+144>
0x00007fffe06df24e <+1102>: lea (%r9,%rbp,4),%r9
0x00007fffe06df252 <+1106>: mov %rsi,%r8
0x00007fffe06df255 <+1109>: subq $0x1,0x40(%rsp)
0x00007fffe06df25b <+1115>: jg,pt 0x7fffe06dee70 <zgemm_kernel_n_SANDYBRIDGE+112>
0x00007fffe06df262 <+1122>: jmp 0x7fffe06df2d0 <zgemm_kernel_n_SANDYBRIDGE+1232>
0x00007fffe06df264 <+1124>: data16 nopw %cs:0x0(%rax,%rax,1)
0x00007fffe06df26f <+1135>: nop
0x00007fffe06df270 <+1136>: vmovups (%rbx),%xmm0
0x00007fffe06df274 <+1140>: vmovups (%rbx,%rbp,1),%xmm1
0x00007fffe06df279 <+1145>: vmovups (%rdx),%xmm2
0x00007fffe06df27d <+1149>: vmovups (%rdx,%rbp,1),%xmm3
0x00007fffe06df282 <+1154>: vaddpd %xmm0,%xmm8,%xmm8
0x00007fffe06df286 <+1158>: vaddpd %xmm1,%xmm10,%xmm10
0x00007fffe06df28a <+1162>: vaddpd %xmm2,%xmm12,%xmm12
--Type <RET> for more, q to quit, c to continue without paging--
0x00007fffe06df28e <+1166>: vaddpd %xmm3,%xmm14,%xmm14
0x00007fffe06df292 <+1170>: vmovups %xmm8,(%rbx)
0x00007fffe06df296 <+1174>: vmovups %xmm10,(%rbx,%rbp,1)
0x00007fffe06df29b <+1179>: vmovups %xmm12,(%rdx)
0x00007fffe06df29f <+1183>: vmovups %xmm14,(%rdx,%rbp,1)
0x00007fffe06df2a4 <+1188>: add $0x10,%rbx
0x00007fffe06df2a8 <+1192>: add $0x10,%rdx
0x00007fffe06df2ac <+1196>: dec %r11
0x00007fffe06df2af <+1199>: jg,pt 0x7fffe06dee90 <zgemm_kernel_n_SANDYBRIDGE+144>
0x00007fffe06df2b6 <+1206>: lea (%r9,%rbp,4),%r9
0x00007fffe06df2ba <+1210>: mov %rsi,%r8
0x00007fffe06df2bd <+1213>: subq $0x1,0x40(%rsp)
0x00007fffe06df2c3 <+1219>: jg,pt 0x7fffe06dee70 <zgemm_kernel_n_SANDYBRIDGE+112>
0x00007fffe06df2ca <+1226>: nopw 0x0(%rax,%rax,1)
0x00007fffe06df2d0 <+1232>: test $0x2,%r14
0x00007fffe06df2d7 <+1239>: jle,pt 0x7fffe06df540 <zgemm_kernel_n_SANDYBRIDGE+1856>
0x00007fffe06df2de <+1246>: mov %r9,%rbx
0x00007fffe06df2e1 <+1249>: lea (%r9,%rbp,1),%rdx
0x00007fffe06df2e5 <+1253>: mov %rcx,%rdi
0x00007fffe06df2e8 <+1256>: mov %r13,%r11
0x00007fffe06df2eb <+1259>: nopl 0x0(%rax,%rax,1)
0x00007fffe06df2f0 <+1264>: mov %r8,%rsi
0x00007fffe06df2f3 <+1267>: vxorps %xmm1,%xmm1,%xmm1
0x00007fffe06df2f7 <+1271>: vmovaps -0x80(%rdi),%xmm0
0x00007fffe06df2fc <+1276>: vxorps %xmm2,%xmm2,%xmm2
0x00007fffe06df300 <+1280>: vxorps %xmm3,%xmm3,%xmm3
0x00007fffe06df304 <+1284>: vxorps %xmm4,%xmm4,%xmm4
0x00007fffe06df308 <+1288>: vxorps %xmm8,%xmm8,%xmm8
0x00007fffe06df30d <+1293>: prefetcht0 0x8(%rbx)
0x00007fffe06df311 <+1297>: vxorps %xmm9,%xmm9,%xmm9
0x00007fffe06df316 <+1302>: prefetcht0 0x10(%rdx)
0x00007fffe06df31a <+1306>: vxorps %xmm10,%xmm10,%xmm10
0x00007fffe06df31f <+1311>: vxorps %xmm11,%xmm11,%xmm11
0x00007fffe06df324 <+1316>: mov %r15,%rax
0x00007fffe06df327 <+1319>: sar $0x2,%rax
0x00007fffe06df32b <+1323>: jle,pn 0x7fffe06df438 <zgemm_kernel_n_SANDYBRIDGE+1592>
0x00007fffe06df332 <+1330>: nopw 0x0(%rax,%rax,1)
0x00007fffe06df338 <+1336>: prefetcht0 0x20(%rdi)
0x00007fffe06df33c <+1340>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df340 <+1344>: vmovaps -0x80(%rsi),%xmm1
0x00007fffe06df345 <+1349>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df349 <+1353>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df34e <+1358>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df352 <+1362>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df356 <+1366>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06df35a <+1370>: vmovaps -0x70(%rsi),%xmm3
0x00007fffe06df35f <+1375>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06df363 <+1379>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df368 <+1384>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06df36c <+1388>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06df370 <+1392>: vmovaps -0x70(%rdi),%xmm0
0x00007fffe06df375 <+1397>: vaddpd %xmm1,%xmm8,%xmm8
--Type <RET> for more, q to quit, c to continue without paging--
0x00007fffe06df379 <+1401>: vmovaps -0x60(%rsi),%xmm1
0x00007fffe06df37e <+1406>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df382 <+1410>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df387 <+1415>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df38b <+1419>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df38f <+1423>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06df393 <+1427>: vmovaps -0x50(%rsi),%xmm3
0x00007fffe06df398 <+1432>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06df39c <+1436>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df3a1 <+1441>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06df3a5 <+1445>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06df3a9 <+1449>: vmovaps -0x60(%rdi),%xmm0
0x00007fffe06df3ae <+1454>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df3b2 <+1458>: vmovaps -0x40(%rsi),%xmm1
0x00007fffe06df3b7 <+1463>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df3bb <+1467>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df3c0 <+1472>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df3c4 <+1476>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df3c8 <+1480>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06df3cc <+1484>: vmovaps -0x30(%rsi),%xmm3
0x00007fffe06df3d1 <+1489>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06df3d5 <+1493>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df3da <+1498>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06df3de <+1502>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06df3e2 <+1506>: vmovaps -0x50(%rdi),%xmm0
0x00007fffe06df3e7 <+1511>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df3eb <+1515>: vmovaps -0x20(%rsi),%xmm1
0x00007fffe06df3f0 <+1520>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df3f4 <+1524>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df3f9 <+1529>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df3fd <+1533>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df401 <+1537>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06df405 <+1541>: vmovaps -0x10(%rsi),%xmm3
0x00007fffe06df40a <+1546>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06df40e <+1550>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df413 <+1555>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06df417 <+1559>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06df41b <+1563>: vmovaps -0x40(%rdi),%xmm0
0x00007fffe06df420 <+1568>: sub $0xffffffffffffffc0,%rdi
0x00007fffe06df424 <+1572>: sub $0xffffffffffffff80,%rsi
0x00007fffe06df428 <+1576>: sub $0x1,%rax
0x00007fffe06df42c <+1580>: jg,pt 0x7fffe06df338 <zgemm_kernel_n_SANDYBRIDGE+1336>
0x00007fffe06df433 <+1587>: nopl 0x0(%rax,%rax,1)
0x00007fffe06df438 <+1592>: mov %r15,%rax
0x00007fffe06df43b <+1595>: and $0x3,%rax
0x00007fffe06df43f <+1599>: je,pt 0x7fffe06df490 <zgemm_kernel_n_SANDYBRIDGE+1680>
0x00007fffe06df442 <+1602>: nopw 0x0(%rax,%rax,1)
0x00007fffe06df448 <+1608>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df44c <+1612>: vmovaps -0x80(%rsi),%xmm1
0x00007fffe06df451 <+1617>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df455 <+1621>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df45a <+1626>: vmulpd %xmm0,%xmm1,%xmm1
--Type <RET> for more, q to quit, c to continue without paging--
0x00007fffe06df45e <+1630>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df462 <+1634>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06df466 <+1638>: vmovaps -0x70(%rsi),%xmm3
0x00007fffe06df46b <+1643>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06df46f <+1647>: vpshufd $0x4e,%xmm3,%xmm4
0x00007fffe06df474 <+1652>: vmulpd %xmm0,%xmm3,%xmm3
0x00007fffe06df478 <+1656>: vmulpd %xmm0,%xmm4,%xmm4
0x00007fffe06df47c <+1660>: vmovaps -0x70(%rdi),%xmm0
0x00007fffe06df481 <+1665>: add $0x10,%rdi
0x00007fffe06df485 <+1669>: add $0x20,%rsi
0x00007fffe06df489 <+1673>: sub $0x1,%rax
0x00007fffe06df48d <+1677>: jg,pt 0x7fffe06df448 <zgemm_kernel_n_SANDYBRIDGE+1608>
0x00007fffe06df490 <+1680>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df494 <+1684>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df498 <+1688>: vaddpd %xmm3,%xmm10,%xmm10
0x00007fffe06df49c <+1692>: vaddpd %xmm4,%xmm11,%xmm11
0x00007fffe06df4a0 <+1696>: vpcmpeqb %xmm0,%xmm0,%xmm0
0x00007fffe06df4a4 <+1700>: vpsllq $0x3f,%xmm0,%xmm0
0x00007fffe06df4a9 <+1705>: vmovddup 0x30(%rsp),%xmm2
0x00007fffe06df4af <+1711>: vmovddup 0x38(%rsp),%xmm3
0x00007fffe06df4b5 <+1717>: vshufps $0x40,%xmm0,%xmm0,%xmm0
0x00007fffe06df4ba <+1722>: vxorps %xmm0,%xmm8,%xmm8
0x00007fffe06df4be <+1726>: vxorps %xmm0,%xmm10,%xmm10
0x00007fffe06df4c2 <+1730>: vhaddpd %xmm9,%xmm8,%xmm8
0x00007fffe06df4c7 <+1735>: vhaddpd %xmm11,%xmm10,%xmm10
0x00007fffe06df4cc <+1740>: vpshufd $0x4e,%xmm8,%xmm9
0x00007fffe06df4d2 <+1746>: vpshufd $0x4e,%xmm10,%xmm11
0x00007fffe06df4d8 <+1752>: vmulpd %xmm2,%xmm8,%xmm8
0x00007fffe06df4dc <+1756>: vmulpd %xmm3,%xmm9,%xmm9
0x00007fffe06df4e0 <+1760>: vmulpd %xmm2,%xmm10,%xmm10
0x00007fffe06df4e4 <+1764>: vmulpd %xmm3,%xmm11,%xmm11
0x00007fffe06df4e8 <+1768>: vaddsubpd %xmm9,%xmm8,%xmm8
0x00007fffe06df4ed <+1773>: vaddsubpd %xmm11,%xmm10,%xmm10
0x00007fffe06df4f2 <+1778>: vmovsd (%rbx),%xmm0
0x00007fffe06df4f6 <+1782>: vmovhpd 0x8(%rbx),%xmm0,%xmm0
0x00007fffe06df4fb <+1787>: vmovsd (%rdx),%xmm1
0x00007fffe06df4ff <+1791>: vmovhpd 0x8(%rdx),%xmm1,%xmm1
0x00007fffe06df504 <+1796>: vaddpd %xmm0,%xmm8,%xmm8
0x00007fffe06df508 <+1800>: vaddpd %xmm1,%xmm10,%xmm10
0x00007fffe06df50c <+1804>: vmovsd %xmm8,(%rbx)
0x00007fffe06df510 <+1808>: vmovhpd %xmm8,0x8(%rbx)
0x00007fffe06df515 <+1813>: vmovsd %xmm10,(%rdx)
0x00007fffe06df519 <+1817>: vmovhpd %xmm10,0x8(%rdx)
0x00007fffe06df51e <+1822>: add $0x10,%rbx
0x00007fffe06df522 <+1826>: add $0x10,%rdx
0x00007fffe06df526 <+1830>: dec %r11
0x00007fffe06df529 <+1833>: jg,pt 0x7fffe06df2f0 <zgemm_kernel_n_SANDYBRIDGE+1264>
0x00007fffe06df530 <+1840>: lea (%r9,%rbp,2),%r9
0x00007fffe06df534 <+1844>: mov %rsi,%r8
0x00007fffe06df537 <+1847>: nopw 0x0(%rax,%rax,1)
0x00007fffe06df540 <+1856>: test $0x1,%r14
0x00007fffe06df547 <+1863>: jle,pt 0x7fffe06df6e0 <zgemm_kernel_n_SANDYBRIDGE+2272>
--Type <RET> for more, q to quit, c to continue without paging--
0x00007fffe06df54e <+1870>: mov %r9,%rbx
0x00007fffe06df551 <+1873>: mov %rcx,%rdi
0x00007fffe06df554 <+1876>: mov %r13,%r11
0x00007fffe06df557 <+1879>: nopw 0x0(%rax,%rax,1)
0x00007fffe06df560 <+1888>: mov %r8,%rsi
0x00007fffe06df563 <+1891>: vxorps %xmm1,%xmm1,%xmm1
0x00007fffe06df567 <+1895>: vmovaps -0x80(%rdi),%xmm0
0x00007fffe06df56c <+1900>: vxorps %xmm2,%xmm2,%xmm2
0x00007fffe06df570 <+1904>: vxorps %xmm8,%xmm8,%xmm8
0x00007fffe06df575 <+1909>: prefetcht0 0x10(%rbx)
0x00007fffe06df579 <+1913>: vxorps %xmm9,%xmm9,%xmm9
0x00007fffe06df57e <+1918>: vxorps %xmm10,%xmm10,%xmm10
0x00007fffe06df583 <+1923>: vxorps %xmm11,%xmm11,%xmm11
0x00007fffe06df588 <+1928>: mov %r15,%rax
0x00007fffe06df58b <+1931>: sar $0x2,%rax
0x00007fffe06df58f <+1935>: jle,pn 0x7fffe06df638 <zgemm_kernel_n_SANDYBRIDGE+2104>
0x00007fffe06df596 <+1942>: xchg %ax,%ax
0x00007fffe06df598 <+1944>: prefetcht0 0x20(%rdi)
0x00007fffe06df59c <+1948>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df5a0 <+1952>: vmovaps -0x80(%rsi),%xmm1
0x00007fffe06df5a5 <+1957>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df5a9 <+1961>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df5ae <+1966>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df5b2 <+1970>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df5b6 <+1974>: vmovaps -0x70(%rdi),%xmm0
0x00007fffe06df5bb <+1979>: vaddpd %xmm1,%xmm10,%xmm10
0x00007fffe06df5bf <+1983>: vmovaps -0x70(%rsi),%xmm1
0x00007fffe06df5c4 <+1988>: vaddpd %xmm2,%xmm11,%xmm11
0x00007fffe06df5c8 <+1992>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df5cd <+1997>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df5d1 <+2001>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df5d5 <+2005>: vmovaps -0x60(%rdi),%xmm0
0x00007fffe06df5da <+2010>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df5de <+2014>: vmovaps -0x60(%rsi),%xmm1
0x00007fffe06df5e3 <+2019>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df5e7 <+2023>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df5ec <+2028>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df5f0 <+2032>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df5f4 <+2036>: vmovaps -0x50(%rdi),%xmm0
0x00007fffe06df5f9 <+2041>: vaddpd %xmm1,%xmm10,%xmm10
0x00007fffe06df5fd <+2045>: vmovaps -0x50(%rsi),%xmm1
0x00007fffe06df602 <+2050>: vaddpd %xmm2,%xmm11,%xmm11
0x00007fffe06df606 <+2054>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df60b <+2059>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df60f <+2063>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df613 <+2067>: vmovaps -0x40(%rdi),%xmm0
0x00007fffe06df618 <+2072>: sub $0xffffffffffffffc0,%rdi
0x00007fffe06df61c <+2076>: sub $0xffffffffffffffc0,%rsi
0x00007fffe06df620 <+2080>: sub $0x1,%rax
0x00007fffe06df624 <+2084>: jg,pt 0x7fffe06df598 <zgemm_kernel_n_SANDYBRIDGE+1944>
0x00007fffe06df62b <+2091>: vaddpd %xmm10,%xmm8,%xmm8
0x00007fffe06df630 <+2096>: vaddpd %xmm11,%xmm9,%xmm9
--Type <RET> for more, q to quit, c to continue without paging--
0x00007fffe06df635 <+2101>: nopl (%rax)
0x00007fffe06df638 <+2104>: mov %r15,%rax
0x00007fffe06df63b <+2107>: and $0x3,%rax
0x00007fffe06df63f <+2111>: je,pt 0x7fffe06df678 <zgemm_kernel_n_SANDYBRIDGE+2168>
0x00007fffe06df642 <+2114>: nopw 0x0(%rax,%rax,1)
0x00007fffe06df648 <+2120>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df64c <+2124>: vmovaps -0x80(%rsi),%xmm1
0x00007fffe06df651 <+2129>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df655 <+2133>: vpshufd $0x4e,%xmm1,%xmm2
0x00007fffe06df65a <+2138>: vmulpd %xmm0,%xmm1,%xmm1
0x00007fffe06df65e <+2142>: vmulpd %xmm0,%xmm2,%xmm2
0x00007fffe06df662 <+2146>: vmovaps -0x70(%rdi),%xmm0
0x00007fffe06df667 <+2151>: add $0x10,%rdi
0x00007fffe06df66b <+2155>: add $0x10,%rsi
0x00007fffe06df66f <+2159>: sub $0x1,%rax
0x00007fffe06df673 <+2163>: jg,pt 0x7fffe06df648 <zgemm_kernel_n_SANDYBRIDGE+2120>
0x00007fffe06df676 <+2166>: xchg %ax,%ax
0x00007fffe06df678 <+2168>: vaddpd %xmm1,%xmm8,%xmm8
0x00007fffe06df67c <+2172>: vaddpd %xmm2,%xmm9,%xmm9
0x00007fffe06df680 <+2176>: vpcmpeqb %xmm0,%xmm0,%xmm0
0x00007fffe06df684 <+2180>: vpsllq $0x3f,%xmm0,%xmm0
0x00007fffe06df689 <+2185>: vmovddup 0x30(%rsp),%xmm2
0x00007fffe06df68f <+2191>: vmovddup 0x38(%rsp),%xmm3
0x00007fffe06df695 <+2197>: vshufps $0x40,%xmm0,%xmm0,%xmm0
0x00007fffe06df69a <+2202>: vxorps %xmm0,%xmm8,%xmm8
0x00007fffe06df69e <+2206>: vhaddpd %xmm9,%xmm8,%xmm8
0x00007fffe06df6a3 <+2211>: vpshufd $0x4e,%xmm8,%xmm9
0x00007fffe06df6a9 <+2217>: vmulpd %xmm2,%xmm8,%xmm8
0x00007fffe06df6ad <+2221>: vmulpd %xmm3,%xmm9,%xmm9
0x00007fffe06df6b1 <+2225>: vaddsubpd %xmm9,%xmm8,%xmm8
0x00007fffe06df6b6 <+2230>: vmovsd (%rbx),%xmm0
0x00007fffe06df6ba <+2234>: vmovhpd 0x8(%rbx),%xmm0,%xmm0
0x00007fffe06df6bf <+2239>: vaddpd %xmm0,%xmm8,%xmm8
0x00007fffe06df6c3 <+2243>: vmovsd %xmm8,(%rbx)
0x00007fffe06df6c7 <+2247>: vmovhpd %xmm8,0x8(%rbx)
0x00007fffe06df6cc <+2252>: add $0x10,%rbx
0x00007fffe06df6d0 <+2256>: dec %r11
0x00007fffe06df6d3 <+2259>: jg,pt 0x7fffe06df560 <zgemm_kernel_n_SANDYBRIDGE+1888>
0x00007fffe06df6da <+2266>: nopw 0x0(%rax,%rax,1)
0x00007fffe06df6e0 <+2272>: mov (%rsp),%rbx
0x00007fffe06df6e4 <+2276>: mov 0x8(%rsp),%rbp
0x00007fffe06df6e9 <+2281>: mov 0x10(%rsp),%r12
0x00007fffe06df6ee <+2286>: mov 0x18(%rsp),%r13
0x00007fffe06df6f3 <+2291>: mov 0x20(%rsp),%r14
0x00007fffe06df6f8 <+2296>: mov 0x28(%rsp),%r15
0x00007fffe06df6fd <+2301>: add $0x80,%rsp
0x00007fffe06df704 <+2308>: retq
End of assembler dump.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment