Created
July 15, 2010 13:36
-
-
Save jedbrown/476942 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; With prefetch and NT stores: | |
0000000000401608 <main+0xac8> add esi,0x4 | |
000000000040160b <main+0xacb> add rcx,0x20 | |
000000000040160f <main+0xacf> cmp esi,0x7a1200 | |
0000000000401615 <main+0xad5> je 00000000004016a9 <main+0xb69> | |
000000000040161b <main+0xadb> mov rax,QWORD PTR [rsp+0x748] | |
0000000000401623 <main+0xae3> movapd xmm0,xmm3 | |
0000000000401627 <main+0xae7> mulpd xmm0,XMMWORD PTR [rax+rcx*1] | |
000000000040162c <main+0xaec> movapd xmm1,XMMWORD PTR [rax+rcx*1+0x10] | |
0000000000401632 <main+0xaf2> mov rax,QWORD PTR [rsp+0x750] | |
000000000040163a <main+0xafa> mulpd xmm1,xmm3 | |
000000000040163e <main+0xafe> addpd xmm0,XMMWORD PTR [rax+rcx*1] | |
0000000000401643 <main+0xb03> mov rax,rcx | |
0000000000401646 <main+0xb06> add rax,QWORD PTR [rsp+0x758] | |
000000000040164e <main+0xb0e> movntpd XMMWORD PTR [rax],xmm0 | |
0000000000401652 <main+0xb12> mov rax,QWORD PTR [rsp+0x750] | |
000000000040165a <main+0xb1a> addpd xmm1,XMMWORD PTR [rax+rcx*1+0x10] | |
0000000000401660 <main+0xb20> mov rax,QWORD PTR [rsp+0x758] | |
0000000000401668 <main+0xb28> add rax,rcx | |
000000000040166b <main+0xb2b> test sil,0x7 | |
000000000040166f <main+0xb2f> movntpd XMMWORD PTR [rax+0x10],xmm1 | |
0000000000401674 <main+0xb34> jne 0000000000401608 <main+0xac8> | |
0000000000401676 <main+0xb36> lea rax,[rcx+0x200] | |
000000000040167d <main+0xb3d> add esi,0x4 | |
0000000000401680 <main+0xb40> add rcx,0x20 | |
0000000000401684 <main+0xb44> mov rdx,rax | |
0000000000401687 <main+0xb47> add rax,QWORD PTR [rsp+0x748] | |
000000000040168f <main+0xb4f> add rdx,QWORD PTR [rsp+0x750] | |
0000000000401697 <main+0xb57> cmp esi,0x7a1200 | |
000000000040169d <main+0xb5d> prefetchnta BYTE PTR [rdx] | |
00000000004016a0 <main+0xb60> prefetchnta BYTE PTR [rax] | |
00000000004016a3 <main+0xb63> jne 000000000040161b <main+0xadb> | |
;; Inner loop for no prefetch and standard stores: | |
0000000000401260 <main+0x840> movapd xmm0,xmm3 | |
0000000000401264 <main+0x844> movapd xmm1,XMMWORD PTR [rax+0x8732f0] | |
000000000040126c <main+0x84c> mulpd xmm0,XMMWORD PTR [rax+0x8732e0] | |
0000000000401274 <main+0x854> mulpd xmm1,xmm3 | |
0000000000401278 <main+0x858> addpd xmm0,XMMWORD PTR [rax+0x73aae0] | |
0000000000401280 <main+0x860> addpd xmm1,XMMWORD PTR [rax+0x73aaf0] | |
0000000000401288 <main+0x868> movapd XMMWORD PTR [rax+0x6022e0],xmm0 | |
0000000000401290 <main+0x870> movapd XMMWORD PTR [rax+0x6022f0],xmm1 | |
0000000000401298 <main+0x878> add rax,0x20 | |
000000000040129c <main+0x87c> cmp rax,0x138800 | |
00000000004012a2 <main+0x882> jne 0000000000401260 <main+0x840> | |
;; Prefetch every loop: | |
0000000000400ea0 <main+0x790> lea rcx,[rax-0x2] | |
0000000000400ea4 <main+0x794> movapd xmm0,XMMWORD PTR [rax8+0x8732c0] | |
0000000000400ead <main+0x79d> prefetchnta BYTE PTR [rdx+0x73acc0] | |
0000000000400eb4 <main+0x7a4> prefetchnta BYTE PTR [rdx+0x8734c0] | |
0000000000400ebb <main+0x7ab> add rdx,0x20 | |
0000000000400ebf <main+0x7af> movapd xmm1,XMMWORD PTR [rcx8+0x8732c0] | |
0000000000400ec8 <main+0x7b8> mulpd xmm0,xmm2 | |
0000000000400ecc <main+0x7bc> mulpd xmm1,xmm2 | |
0000000000400ed0 <main+0x7c0> addpd xmm0,XMMWORD PTR [rax8+0x73aac0] | |
0000000000400ed9 <main+0x7c9> addpd xmm1,XMMWORD PTR [rcx8+0x73aac0] | |
0000000000400ee2 <main+0x7d2> movntpd XMMWORD PTR [rcx8+0x6022c0],xmm1 | |
0000000000400eeb <main+0x7db> movntpd XMMWORD PTR [rax8+0x6022c0],xmm0 | |
0000000000400ef4 <main+0x7e4> add rax,0x4 | |
0000000000400ef8 <main+0x7e8> cmp rax,0x27102 | |
0000000000400efe <main+0x7ee> jne 0000000000400ea0 <main+0x790> | |
;; Unrolled 4 times (for Aron): | |
0000000000401270 <main+0x850> movapd xmm0,xmm4 | |
0000000000401274 <main+0x854> lea rax,[rdx+0x200] | |
000000000040127b <main+0x85b> movapd xmm1,XMMWORD PTR [rdx+0x8013e70] | |
0000000000401283 <main+0x863> mulpd xmm0,XMMWORD PTR [rdx+0x8013e60] | |
000000000040128b <main+0x86b> prefetchnta BYTE PTR [rax+0x430ae60] | |
0000000000401292 <main+0x872> prefetchnta BYTE PTR [rax+0x8013e60] | |
0000000000401299 <main+0x879> movapd xmm2,XMMWORD PTR [rdx+0x8013e80] | |
00000000004012a1 <main+0x881> mulpd xmm1,xmm4 | |
00000000004012a5 <main+0x885> movapd xmm3,XMMWORD PTR [rdx+0x8013e90] | |
00000000004012ad <main+0x88d> mulpd xmm2,xmm4 | |
00000000004012b1 <main+0x891> mulpd xmm3,xmm4 | |
00000000004012b5 <main+0x895> addpd xmm0,XMMWORD PTR [rdx+0x430ae60] | |
00000000004012bd <main+0x89d> addpd xmm1,XMMWORD PTR [rdx+0x430ae70] | |
00000000004012c5 <main+0x8a5> addpd xmm2,XMMWORD PTR [rdx+0x430ae80] | |
00000000004012cd <main+0x8ad> addpd xmm3,XMMWORD PTR [rdx+0x430ae90] | |
00000000004012d5 <main+0x8b5> movntpd XMMWORD PTR [rdx+0x601e60],xmm0 | |
00000000004012dd <main+0x8bd> movntpd XMMWORD PTR [rdx+0x601e70],xmm1 | |
00000000004012e5 <main+0x8c5> movntpd XMMWORD PTR [rdx+0x601e80],xmm2 | |
00000000004012ed <main+0x8cd> movntpd XMMWORD PTR [rdx+0x601e90],xmm3 | |
00000000004012f5 <main+0x8d5> add rdx,0x40 | |
00000000004012f9 <main+0x8d9> cmp rdx,0x3d09000 | |
0000000000401300 <main+0x8e0> jne 0000000000401270 <main+0x850> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
; Prefetch every loop:
0000000000400ea0 <main+0x790> lea rcx,[rax-0x2]
0000000000400ea4 <main+0x794> movapd xmm0,XMMWORD PTR [rax_8+0x8732c0]
0000000000400ead <main+0x79d> prefetchnta BYTE PTR [rdx+0x73acc0]
0000000000400eb4 <main+0x7a4> prefetchnta BYTE PTR [rdx+0x8734c0]
0000000000400ebb <main+0x7ab> add rdx,0x20
0000000000400ebf <main+0x7af> movapd xmm1,XMMWORD PTR [rcx_8+0x8732c0]
0000000000400ec8 <main+0x7b8> mulpd xmm0,xmm2
0000000000400ecc <main+0x7bc> mulpd xmm1,xmm2
0000000000400ed0 <main+0x7c0> addpd xmm0,XMMWORD PTR [rax_8+0x73aac0]
0000000000400ed9 <main+0x7c9> addpd xmm1,XMMWORD PTR [rcx_8+0x73aac0]
0000000000400ee2 <main+0x7d2> movntpd XMMWORD PTR [rcx_8+0x6022c0],xmm1
0000000000400eeb <main+0x7db> movntpd XMMWORD PTR [rax_8+0x6022c0],xmm0
0000000000400ef4 <main+0x7e4> add rax,0x4
0000000000400ef8 <main+0x7e8> cmp rax,0x27102
0000000000400efe <main+0x7ee> jne 0000000000400ea0 <main+0x790>