Created
July 2, 2024 21:30
-
-
Save ChillFish8/3538c972d041babd2bc0b6b2a25bf9d8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Iterations: 100 | |
Instructions: 12600 | |
Total Cycles: 10225 | |
Total uOps: 22600 | |
Dispatch Width: 6 | |
uOps Per Cycle: 2.21 | |
IPC: 1.23 | |
Block RThroughput: 37.7 | |
Instruction Info: | |
[1]: #uOps | |
[2]: Latency | |
[3]: RThroughput | |
[4]: MayLoad | |
[5]: MayStore | |
[6]: HasSideEffects (U) | |
[1] [2] [3] [4] [5] [6] Instructions: | |
1 1 0.25 sub rsp, 104 | |
1 1 1.00 * mov qword ptr [rsp + 40], rdx | |
1 1 1.00 * mov qword ptr [rsp + 48], r9 | |
1 1 0.25 cmp rdx, r9 | |
1 1 0.50 jne .LBB4_16 | |
1 0 0.17 mov rax, rdx | |
1 1 0.25 and rax, -8 | |
1 1 0.50 je .LBB4_2 | |
1 1 0.33 lea r10, [rax - 1] | |
1 1 0.50 shr r10, 3 | |
1 1 0.25 inc r10 | |
1 0 0.17 mov r9d, r10d | |
1 1 0.25 and r9d, 7 | |
1 1 0.25 cmp rax, 57 | |
1 1 0.50 jae .LBB4_14 | |
1 0 0.17 vxorps xmm0, xmm0, xmm0 | |
1 0 0.17 xor eax, eax | |
1 1 0.50 jmp .LBB4_5 | |
1 0 0.17 xor eax, eax | |
1 0 0.17 vxorps xmm0, xmm0, xmm0 | |
1 1 0.50 jmp .LBB4_7 | |
1 1 0.25 and r10, -8 | |
1 0 0.17 vxorps xmm0, xmm0, xmm0 | |
1 0 0.17 xor eax, eax | |
1 8 0.50 * vmovups ymm1, ymmword ptr [rcx + 4*rax] | |
1 8 0.50 * vmovups ymm2, ymmword ptr [rcx + 4*rax + 32] | |
1 8 0.50 * vmovups ymm3, ymmword ptr [rcx + 4*rax + 64] | |
1 8 0.50 * vmovups ymm4, ymmword ptr [rcx + 4*rax + 96] | |
1 10 0.50 * vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax] | |
1 3 0.50 vaddps ymm0, ymm0, ymm1 | |
1 10 0.50 * vmulps ymm1, ymm2, ymmword ptr [r8 + 4*rax + 32] | |
1 3 0.50 vaddps ymm0, ymm0, ymm1 | |
1 10 0.50 * vmulps ymm1, ymm3, ymmword ptr [r8 + 4*rax + 64] | |
1 3 0.50 vaddps ymm0, ymm0, ymm1 | |
1 10 0.50 * vmulps ymm1, ymm4, ymmword ptr [r8 + 4*rax + 96] | |
1 3 0.50 vaddps ymm0, ymm0, ymm1 | |
1 8 0.50 * vmovups ymm1, ymmword ptr [rcx + 4*rax + 128] | |
1 10 0.50 * vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax + 128] | |
1 3 0.50 vaddps ymm0, ymm0, ymm1 | |
1 8 0.50 * vmovups ymm1, ymmword ptr [rcx + 4*rax + 160] | |
1 10 0.50 * vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax + 160] | |
1 3 0.50 vaddps ymm0, ymm0, ymm1 | |
1 8 0.50 * vmovups ymm1, ymmword ptr [rcx + 4*rax + 192] | |
1 10 0.50 * vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax + 192] | |
1 3 0.50 vaddps ymm0, ymm0, ymm1 | |
1 8 0.50 * vmovups ymm1, ymmword ptr [rcx + 4*rax + 224] | |
1 10 0.50 * vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax + 224] | |
1 1 0.25 add rax, 64 | |
1 1 0.25 add r10, -8 | |
1 3 0.50 vaddps ymm0, ymm0, ymm1 | |
1 1 0.50 jne .LBB4_15 | |
1 1 0.25 test r9, r9 | |
1 1 0.50 je .LBB4_7 | |
1 8 0.50 * vmovups ymm1, ymmword ptr [rcx + 4*rax] | |
1 10 0.50 * vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax] | |
1 1 0.25 add rax, 8 | |
1 1 0.25 dec r9 | |
1 3 0.50 vaddps ymm0, ymm0, ymm1 | |
1 1 0.50 jne .LBB4_6 | |
1 4 1.00 vextractf128 xmm1, ymm0, 1 | |
1 0 0.17 mov r9, rax | |
1 1 0.25 sub r9, rdx | |
1 3 0.50 vaddps xmm0, xmm1, xmm0 | |
1 1 0.50 vshufpd xmm1, xmm0, xmm0, 1 | |
1 3 0.50 vaddps xmm0, xmm0, xmm1 | |
1 1 0.50 vmovshdup xmm1, xmm0 | |
1 3 0.50 vaddss xmm0, xmm0, xmm1 | |
1 1 0.50 jae .LBB4_13 | |
1 0 0.17 mov r10d, edx | |
1 1 0.25 sub r10d, eax | |
1 1 0.25 and r10d, 7 | |
1 1 0.50 je .LBB4_10 | |
1 8 0.50 * vmovss xmm1, dword ptr [rcx + 4*rax] | |
1 10 0.50 * vmulss xmm1, xmm1, dword ptr [r8 + 4*rax] | |
1 1 0.25 inc rax | |
1 1 0.25 dec r10 | |
1 3 0.50 vaddss xmm0, xmm0, xmm1 | |
1 1 0.50 jne .LBB4_9 | |
1 1 0.25 cmp r9, -8 | |
1 1 0.50 ja .LBB4_13 | |
1 1 0.25 neg rdx | |
1 1 0.25 add rax, 7 | |
1 8 0.50 * vmovss xmm1, dword ptr [rcx + 4*rax - 28] | |
1 8 0.50 * vmovss xmm2, dword ptr [rcx + 4*rax - 24] | |
2 2 0.25 lea r9, [rdx + rax + 8] | |
1 10 0.50 * vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 28] | |
1 3 0.50 vaddss xmm0, xmm0, xmm1 | |
1 10 0.50 * vmulss xmm1, xmm2, dword ptr [r8 + 4*rax - 24] | |
1 3 0.50 vaddss xmm0, xmm0, xmm1 | |
1 8 0.50 * vmovss xmm1, dword ptr [rcx + 4*rax - 20] | |
1 10 0.50 * vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 20] | |
1 3 0.50 vaddss xmm0, xmm0, xmm1 | |
1 8 0.50 * vmovss xmm1, dword ptr [rcx + 4*rax - 16] | |
1 10 0.50 * vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 16] | |
1 3 0.50 vaddss xmm0, xmm0, xmm1 | |
1 8 0.50 * vmovss xmm1, dword ptr [rcx + 4*rax - 12] | |
1 10 0.50 * vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 12] | |
1 3 0.50 vaddss xmm0, xmm0, xmm1 | |
1 8 0.50 * vmovss xmm1, dword ptr [rcx + 4*rax - 8] | |
1 10 0.50 * vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 8] | |
1 3 0.50 vaddss xmm0, xmm0, xmm1 | |
1 8 0.50 * vmovss xmm1, dword ptr [rcx + 4*rax - 4] | |
1 10 0.50 * vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 4] | |
1 3 0.50 vaddss xmm0, xmm0, xmm1 | |
1 8 0.50 * vmovss xmm1, dword ptr [rcx + 4*rax] | |
1 10 0.50 * vmulss xmm1, xmm1, dword ptr [r8 + 4*rax] | |
1 1 0.25 add rax, 8 | |
1 3 0.50 vaddss xmm0, xmm0, xmm1 | |
1 1 0.25 cmp r9, 7 | |
1 1 0.50 jne .LBB4_12 | |
1 1 0.25 add rsp, 104 | |
1 0 0.25 U vzeroupper | |
1 5 0.50 U ret | |
1 1 0.33 lea rax, [rip + __unnamed_2] | |
1 1 0.33 lea rcx, [rip + __unnamed_3] | |
1 1 0.33 lea r9, [rip + __unnamed_5] | |
1 1 0.33 lea rdx, [rsp + 48] | |
1 1 0.33 lea r8, [rsp + 56] | |
1 0 0.17 vxorps xmm0, xmm0, xmm0 | |
1 1 1.00 * mov qword ptr [rsp + 56], rax | |
1 1 1.00 * mov qword ptr [rsp + 64], 1 | |
1 1 1.00 * mov qword ptr [rsp + 72], rcx | |
1 1 0.33 lea rcx, [rsp + 40] | |
1 1 1.00 * vmovups xmmword ptr [rsp + 80], xmm0 | |
1 1 0.50 call core::panicking::assert_failed | |
100 100 25.00 * * U int3 | |
Resources: | |
[0] - Zn3AGU0 | |
[1] - Zn3AGU1 | |
[2] - Zn3AGU2 | |
[3] - Zn3ALU0 | |
[4] - Zn3ALU1 | |
[5] - Zn3ALU2 | |
[6] - Zn3ALU3 | |
[7] - Zn3BRU1 | |
[8] - Zn3FPP0 | |
[9] - Zn3FPP1 | |
[10] - Zn3FPP2 | |
[11] - Zn3FPP3 | |
[12.0] - Zn3FPP45 | |
[12.1] - Zn3FPP45 | |
[13] - Zn3FPSt | |
[14.0] - Zn3LSU | |
[14.1] - Zn3LSU | |
[14.2] - Zn3LSU | |
[15.0] - Zn3Load | |
[15.1] - Zn3Load | |
[15.2] - Zn3Load | |
[16.0] - Zn3Store | |
[16.1] - Zn3Store | |
Resource pressure per iteration: | |
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] | |
4.33 4.33 4.34 10.97 100.07 8.99 9.98 9.99 10.00 10.99 11.00 11.01 18.50 18.50 1.00 15.99 16.00 16.01 12.33 12.33 12.34 5.00 6.00 | |
Resource pressure by instruction: | |
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: | |
- - - - - - 1.00 - - - - - - - - - - - - - - - - sub rsp, 104 | |
0.33 0.33 0.34 - - - - - - - - - - - - 0.66 0.66 0.68 - - - - 2.00 mov qword ptr [rsp + 40], rdx | |
0.33 0.34 0.33 - - - - - - - - - - - - 0.66 0.68 0.66 - - - 2.00 - mov qword ptr [rsp + 48], r9 | |
- - - - - 1.00 - - - - - - - - - - - - - - - - - cmp rdx, r9 | |
- - - - - - - 1.00 - - - - - - - - - - - - - - - jne .LBB4_16 | |
- - - - - - - - - - - - - - - - - - - - - - - mov rax, rdx | |
- - - 0.99 0.01 - - - - - - - - - - - - - - - - - - and rax, -8 | |
- - - 1.00 - - - - - - - - - - - - - - - - - - - je .LBB4_2 | |
0.34 0.33 0.33 - - - - - - - - - - - - - - - - - - - - lea r10, [rax - 1] | |
- - - - - 1.00 - - - - - - - - - - - - - - - - - shr r10, 3 | |
- - - - 0.01 - 0.99 - - - - - - - - - - - - - - - - inc r10 | |
- - - - - - - - - - - - - - - - - - - - - - - mov r9d, r10d | |
- - - - - 0.99 0.01 - - - - - - - - - - - - - - - - and r9d, 7 | |
- - - - - - 1.00 - - - - - - - - - - - - - - - - cmp rax, 57 | |
- - - 0.01 - - - 0.99 - - - - - - - - - - - - - - - jae .LBB4_14 | |
- - - - - - - - - - - - - - - - - - - - - - - vxorps xmm0, xmm0, xmm0 | |
- - - - - - - - - - - - - - - - - - - - - - - xor eax, eax | |
- - - 0.99 - - - 0.01 - - - - - - - - - - - - - - - jmp .LBB4_5 | |
- - - - - - - - - - - - - - - - - - - - - - - xor eax, eax | |
- - - - - - - - - - - - - - - - - - - - - - - vxorps xmm0, xmm0, xmm0 | |
- - - - - - - 1.00 - - - - - - - - - - - - - - - jmp .LBB4_7 | |
- - - 0.99 - 0.01 - - - - - - - - - - - - - - - - - and r10, -8 | |
- - - - - - - - - - - - - - - - - - - - - - - vxorps xmm0, xmm0, xmm0 | |
- - - - - - - - - - - - - - - - - - - - - - - xor eax, eax | |
- - - - - - - - - - - - 0.50 0.50 - 0.34 0.33 0.33 0.33 0.33 0.34 - - vmovups ymm1, ymmword ptr [rcx + 4*rax] | |
- - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.34 0.33 0.34 0.33 - - vmovups ymm2, ymmword ptr [rcx + 4*rax + 32] | |
- - - - - - - - - - - - 0.50 0.50 - 0.33 0.34 0.33 0.34 0.33 0.33 - - vmovups ymm3, ymmword ptr [rcx + 4*rax + 64] | |
- - - - - - - - - - - - 0.50 0.50 - 0.34 0.33 0.33 0.33 0.33 0.34 - - vmovups ymm4, ymmword ptr [rcx + 4*rax + 96] | |
- - - - - - - - 0.98 0.02 - - 0.50 0.50 - 0.33 0.33 0.34 0.33 0.34 0.33 - - vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax] | |
- - - - - - - - - - 0.99 0.01 - - - - - - - - - - - vaddps ymm0, ymm0, ymm1 | |
- - - - - - - - 0.01 0.99 - - 0.51 0.49 - 0.32 0.34 0.34 0.34 0.34 0.32 - - vmulps ymm1, ymm2, ymmword ptr [r8 + 4*rax + 32] | |
- - - - - - - - - - - 1.00 - - - - - - - - - - - vaddps ymm0, ymm0, ymm1 | |
- - - - - - - - 0.99 0.01 - - 0.49 0.51 - 0.34 0.34 0.32 0.34 0.32 0.34 - - vmulps ymm1, ymm3, ymmword ptr [r8 + 4*rax + 64] | |
- - - - - - - - - - 0.01 0.99 - - - - - - - - - - - vaddps ymm0, ymm0, ymm1 | |
- - - - - - - - 0.01 0.99 - - 0.51 0.49 - 0.34 0.32 0.34 0.32 0.34 0.34 - - vmulps ymm1, ymm4, ymmword ptr [r8 + 4*rax + 96] | |
- - - - - - - - - - 0.98 0.02 - - - - - - - - - - - vaddps ymm0, ymm0, ymm1 | |
- - - - - - - - - - - - 0.49 0.51 - 0.33 0.34 0.33 0.34 0.33 0.33 - - vmovups ymm1, ymmword ptr [rcx + 4*rax + 128] | |
- - - - - - - - 0.99 0.01 - - 0.49 0.51 - 0.33 0.32 0.35 0.32 0.35 0.33 - - vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax + 128] | |
- - - - - - - - - - 0.01 0.99 - - - - - - - - - - - vaddps ymm0, ymm0, ymm1 | |
- - - - - - - - - - - - 0.51 0.49 - 0.34 0.34 0.32 0.34 0.32 0.34 - - vmovups ymm1, ymmword ptr [rcx + 4*rax + 160] | |
- - - - - - - - 0.01 0.99 - - 0.51 0.49 - 0.32 0.34 0.34 0.34 0.34 0.32 - - vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax + 160] | |
- - - - - - - - - - 0.98 0.02 - - - - - - - - - - - vaddps ymm0, ymm0, ymm1 | |
- - - - - - - - - - - - 0.49 0.51 - 0.35 0.33 0.32 0.33 0.32 0.35 - - vmovups ymm1, ymmword ptr [rcx + 4*rax + 192] | |
- - - - - - - - 0.99 0.01 - - 0.49 0.51 - 0.32 0.35 0.33 0.35 0.33 0.32 - - vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax + 192] | |
- - - - - - - - - - 0.01 0.99 - - - - - - - - - - - vaddps ymm0, ymm0, ymm1 | |
- - - - - - - - - - - - 0.51 0.49 - 0.34 0.32 0.34 0.32 0.34 0.34 - - vmovups ymm1, ymmword ptr [rcx + 4*rax + 224] | |
- - - - - - - - 0.01 0.99 - - 0.51 0.49 - 0.34 0.34 0.32 0.34 0.32 0.34 - - vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax + 224] | |
- - - - 0.01 - 0.99 - - - - - - - - - - - - - - - - add rax, 64 | |
- - - 0.01 - 0.99 - - - - - - - - - - - - - - - - - add r10, -8 | |
- - - - - - - - - - 0.98 0.02 - - - - - - - - - - - vaddps ymm0, ymm0, ymm1 | |
- - - - - - - 1.00 - - - - - - - - - - - - - - - jne .LBB4_15 | |
- - - 0.99 - - 0.01 - - - - - - - - - - - - - - - - test r9, r9 | |
- - - 0.01 - - - 0.99 - - - - - - - - - - - - - - - je .LBB4_7 | |
- - - - - - - - - - - - 0.49 0.51 - 0.33 0.32 0.35 0.32 0.35 0.33 - - vmovups ymm1, ymmword ptr [rcx + 4*rax] | |
- - - - - - - - 0.99 0.01 - - 0.49 0.51 - 0.35 0.33 0.32 0.33 0.32 0.35 - - vmulps ymm1, ymm1, ymmword ptr [r8 + 4*rax] | |
- - - - - 0.01 0.99 - - - - - - - - - - - - - - - - add rax, 8 | |
- - - - 0.01 0.99 - - - - - - - - - - - - - - - - - dec r9 | |
- - - - - - - - - - 0.01 0.99 - - - - - - - - - - - vaddps ymm0, ymm0, ymm1 | |
- - - - - - - 1.00 - - - - - - - - - - - - - - - jne .LBB4_6 | |
- - - - - - - - 1.00 - - - - - - - - - - - - - - vextractf128 xmm1, ymm0, 1 | |
- - - - - - - - - - - - - - - - - - - - - - - mov r9, rax | |
- - - 0.99 - - 0.01 - - - - - - - - - - - - - - - - sub r9, rdx | |
- - - - - - - - - - 0.99 0.01 - - - - - - - - - - - vaddps xmm0, xmm1, xmm0 | |
- - - - - - - - - 0.99 0.01 - - - - - - - - - - - - vshufpd xmm1, xmm0, xmm0, 1 | |
- - - - - - - - - - - 1.00 - - - - - - - - - - - vaddps xmm0, xmm0, xmm1 | |
- - - - - - - - - 0.99 0.01 - - - - - - - - - - - - vmovshdup xmm1, xmm0 | |
- - - - - - - - - - 0.99 0.01 - - - - - - - - - - - vaddss xmm0, xmm0, xmm1 | |
- - - 1.00 - - - - - - - - - - - - - - - - - - - jae .LBB4_13 | |
- - - - - - - - - - - - - - - - - - - - - - - mov r10d, edx | |
- - - - - 0.01 0.99 - - - - - - - - - - - - - - - - sub r10d, eax | |
- - - - 0.01 0.99 - - - - - - - - - - - - - - - - - and r10d, 7 | |
- - - - - - - 1.00 - - - - - - - - - - - - - - - je .LBB4_10 | |
- - - - - - - - - - - - 0.51 0.49 - 0.32 0.34 0.34 0.34 0.34 0.32 - - vmovss xmm1, dword ptr [rcx + 4*rax] | |
- - - - - - - - 0.01 0.99 - - 0.50 0.50 - 0.33 0.33 0.34 0.33 0.34 0.33 - - vmulss xmm1, xmm1, dword ptr [r8 + 4*rax] | |
- - - - - - 1.00 - - - - - - - - - - - - - - - - inc rax | |
- - - - - 0.01 0.99 - - - - - - - - - - - - - - - - dec r10 | |
- - - - - - - - - - 0.01 0.99 - - - - - - - - - - - vaddss xmm0, xmm0, xmm1 | |
- - - - - - - 1.00 - - - - - - - - - - - - - - - jne .LBB4_9 | |
- - - 0.99 0.01 - - - - - - - - - - - - - - - - - - cmp r9, -8 | |
- - - 1.00 - - - - - - - - - - - - - - - - - - - ja .LBB4_13 | |
- - - - - - 1.00 - - - - - - - - - - - - - - - - neg rdx | |
- - - 0.01 - 0.99 - - - - - - - - - - - - - - - - - add rax, 7 | |
- - - - - - - - - - - - 0.50 0.50 - 0.33 0.34 0.33 0.34 0.33 0.33 - - vmovss xmm1, dword ptr [rcx + 4*rax - 28] | |
- - - - - - - - - - - - 0.50 0.50 - 0.34 0.33 0.33 0.33 0.33 0.34 - - vmovss xmm2, dword ptr [rcx + 4*rax - 24] | |
- - - - - 1.00 - - - - - - - - - - - - - - - - - lea r9, [rdx + rax + 8] | |
- - - - - - - - 0.99 0.01 - - 0.50 0.50 - 0.33 0.33 0.34 0.33 0.34 0.33 - - vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 28] | |
- - - - - - - - - - 1.00 - - - - - - - - - - - - vaddss xmm0, xmm0, xmm1 | |
- - - - - - - - 0.01 0.99 - - 0.50 0.50 - 0.33 0.34 0.33 0.34 0.33 0.33 - - vmulss xmm1, xmm2, dword ptr [r8 + 4*rax - 24] | |
- - - - - - - - - - 0.01 0.99 - - - - - - - - - - - vaddss xmm0, xmm0, xmm1 | |
- - - - - - - - - - - - 0.50 0.50 - 0.34 0.33 0.33 0.33 0.33 0.34 - - vmovss xmm1, dword ptr [rcx + 4*rax - 20] | |
- - - - - - - - 0.99 0.01 - - 0.50 0.50 - 0.33 0.34 0.33 0.34 0.33 0.33 - - vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 20] | |
- - - - - - - - - - 1.00 - - - - - - - - - - - - vaddss xmm0, xmm0, xmm1 | |
- - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.34 0.33 0.34 0.33 - - vmovss xmm1, dword ptr [rcx + 4*rax - 16] | |
- - - - - - - - 0.01 0.99 - - 0.50 0.50 - 0.34 0.33 0.33 0.33 0.33 0.34 - - vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 16] | |
- - - - - - - - - - 0.01 0.99 - - - - - - - - - - - vaddss xmm0, xmm0, xmm1 | |
- - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.34 0.33 0.34 0.33 - - vmovss xmm1, dword ptr [rcx + 4*rax - 12] | |
- - - - - - - - 0.99 0.01 - - 0.50 0.50 - 0.33 0.32 0.35 0.32 0.35 0.33 - - vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 12] | |
- - - - - - - - - - 1.00 - - - - - - - - - - - - vaddss xmm0, xmm0, xmm1 | |
- - - - - - - - - - - - 0.50 0.50 - 0.33 0.34 0.33 0.34 0.33 0.33 - - vmovss xmm1, dword ptr [rcx + 4*rax - 8] | |
- - - - - - - - 0.01 0.99 - - 0.50 0.50 - 0.32 0.35 0.33 0.35 0.33 0.32 - - vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 8] | |
- - - - - - - - - - 0.01 0.99 - - - - - - - - - - - vaddss xmm0, xmm0, xmm1 | |
- - - - - - - - - - - - 0.50 0.50 - 0.35 0.33 0.32 0.33 0.32 0.35 - - vmovss xmm1, dword ptr [rcx + 4*rax - 4] | |
- - - - - - - - 0.99 0.01 - - 0.50 0.50 - 0.32 0.35 0.33 0.35 0.33 0.32 - - vmulss xmm1, xmm1, dword ptr [r8 + 4*rax - 4] | |
- - - - - - - - - - 1.00 - - - - - - - - - - - - vaddss xmm0, xmm0, xmm1 | |
- - - - - - - - - - - - 0.50 0.50 - 0.33 0.32 0.35 0.32 0.35 0.33 - - vmovss xmm1, dword ptr [rcx + 4*rax] | |
- - - - - - - - 0.01 0.99 - - 0.50 0.50 - 0.35 0.33 0.32 0.33 0.32 0.35 - - vmulss xmm1, xmm1, dword ptr [r8 + 4*rax] | |
- - - 0.99 0.01 - - - - - - - - - - - - - - - - - - add rax, 8 | |
- - - - - - - - - - - 1.00 - - - - - - - - - - - vaddss xmm0, xmm0, xmm1 | |
- - - - - - 1.00 - - - - - - - - - - - - - - - - cmp r9, 7 | |
- - - 0.01 - - - 0.99 - - - - - - - - - - - - - - - jne .LBB4_12 | |
- - - - - 1.00 - - - - - - - - - - - - - - - - - add rsp, 104 | |
- - - - - - - - 0.01 - 0.99 - - - - - - - - - - - - vzeroupper | |
0.33 0.33 0.34 0.99 - - - 0.01 - - - - - - - 0.34 0.33 0.33 0.33 0.33 0.34 - - ret | |
0.33 0.34 0.33 - - - - - - - - - - - - - - - - - - - - lea rax, [rip + __unnamed_2] | |
0.34 0.33 0.33 - - - - - - - - - - - - - - - - - - - - lea rcx, [rip + __unnamed_3] | |
0.33 0.33 0.34 - - - - - - - - - - - - - - - - - - - - lea r9, [rip + __unnamed_5] | |
0.33 0.34 0.33 - - - - - - - - - - - - - - - - - - - - lea rdx, [rsp + 48] | |
0.34 0.33 0.33 - - - - - - - - - - - - - - - - - - - - lea r8, [rsp + 56] | |
- - - - - - - - - - - - - - - - - - - - - - - vxorps xmm0, xmm0, xmm0 | |
0.32 0.35 0.33 - - - - - - - - - - - - 0.66 0.66 0.68 - - - - 2.00 mov qword ptr [rsp + 56], rax | |
0.35 0.33 0.32 - - - - - - - - - - - - 0.66 0.68 0.66 - - - 2.00 - mov qword ptr [rsp + 64], 1 | |
0.33 0.32 0.35 - - - - - - - - - - - - 0.68 0.66 0.66 - - - - 2.00 mov qword ptr [rsp + 72], rcx | |
0.33 0.33 0.34 - - - - - - - - - - - - - - - - - - - - lea rcx, [rsp + 40] | |
- - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.34 - - - 1.00 - vmovups xmmword ptr [rsp + 80], xmm0 | |
- - - - - - - 1.00 - - - - - - - - - - - - - - - call core::panicking::assert_failed | |
- - - - 100.00 - - - - - - - - - - - - - - - - - - int3 | |
<stdin>:235:1: error: Unfinished frame! | |
^ | |
<stdin>:235:1: error: Unfinished frame! | |
^ | |
warning: found a return instruction in the input assembly sequence. | |
note: program counter updates are ignored. | |
warning: found a call in the input assembly sequence. | |
note: call instructions are not correctly modeled. Assume a latency of 100cy. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment