Created
December 31, 2025 16:46
-
-
Save iczelia/3cce65034b918382b53624854215d23e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Iterations: 100 | |
| Instructions: 5000 | |
| Total Cycles: 3199 | |
| Total uOps: 6400 | |
| Dispatch Width: 4 | |
| uOps Per Cycle: 2.00 | |
| IPC: 1.56 | |
| Block RThroughput: 16.0 | |
| Instruction Info: | |
| [1]: #uOps | |
| [2]: Latency | |
| [3]: RThroughput | |
| [4]: MayLoad | |
| [5]: MayStore | |
| [6]: HasSideEffects (U) | |
| [1] [2] [3] [4] [5] [6] Instructions: | |
| 1 1 0.25 test rdx, rdx | |
| 1 1 0.50 sete al | |
| 1 1 0.25 test dil, 15 | |
| 1 1 0.50 sete cl | |
| 1 1 0.25 or cl, al | |
| 1 1 0.50 jne .LBB1_6 | |
| 1 1 0.50 lea rax, [rdi + 1] | |
| 2 6 0.50 * cmp byte ptr [rdi], sil | |
| 1 1 0.50 je .LBB1_16 | |
| 1 1 0.50 lea r8, [rdx - 1] | |
| 1 1 0.25 inc rdi | |
| 1 1 0.25 cmp rdx, 1 | |
| 1 1 0.50 je .LBB1_5 | |
| 1 1 0.25 mov ecx, eax | |
| 1 1 0.25 and ecx, 15 | |
| 1 1 0.25 inc rax | |
| 1 1 0.25 mov rdx, r8 | |
| 1 1 0.25 test rcx, rcx | |
| 1 1 0.50 jne .LBB1_2 | |
| 1 1 0.25 cmp r8, 16 | |
| 1 1 0.50 jae .LBB1_7 | |
| 1 1 0.50 jmp .LBB1_10 | |
| 1 1 0.25 mov r8, rdx | |
| 1 1 0.25 cmp r8, 16 | |
| 1 1 0.50 jb .LBB1_10 | |
| 1 1 0.25 movzx eax, sil | |
| 1 1 1.00 vmovd xmm0, eax | |
| 1 1 0.25 mov eax, 1 | |
| 1 1 0.25 mov edx, 16 | |
| 9 23 4.00 * vpcmpestri xmm0, xmmword ptr [rdi], 0 | |
| 1 1 0.25 cmp ecx, 16 | |
| 1 1 0.50 jne .LBB1_15 | |
| 1 1 0.25 add r8, -16 | |
| 1 1 0.25 add rdi, 16 | |
| 1 1 0.25 cmp r8, 15 | |
| 1 1 0.50 ja .LBB1_8 | |
| 1 1 0.25 test r8, r8 | |
| 1 1 0.50 je .LBB1_14 | |
| 2 6 0.50 * cmp byte ptr [rdi], sil | |
| 1 1 0.50 je .LBB1_16 | |
| 1 1 0.25 inc rdi | |
| 1 1 0.25 dec r8 | |
| 1 1 0.50 jne .LBB1_11 | |
| 1 0 0.25 xor edi, edi | |
| 1 1 0.25 mov rax, rdi | |
| 3 7 1.00 U ret | |
| 1 1 0.25 mov eax, ecx | |
| 1 1 0.25 add rdi, rax | |
| 1 1 0.25 mov rax, rdi | |
| 3 7 1.00 U ret | |
| Resources: | |
| [0] - BWDivider | |
| [1] - BWFPDivider | |
| [2] - BWPort0 | |
| [3] - BWPort1 | |
| [4] - BWPort2 | |
| [5] - BWPort3 | |
| [6] - BWPort4 | |
| [7] - BWPort5 | |
| [8] - BWPort6 | |
| [9] - BWPort7 | |
| Resource pressure per iteration: | |
| [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] | |
| - - 15.04 13.96 2.50 2.50 - 14.03 14.97 - | |
| Resource pressure by instruction: | |
| [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: | |
| - - 0.97 - - - - - 0.03 - test rdx, rdx | |
| - - 0.98 - - - - - 0.02 - sete al | |
| - - - 0.96 - - - 0.04 - - test dil, 15 | |
| - - 0.98 - - - - - 0.02 - sete cl | |
| - - - - - - - 0.99 0.01 - or cl, al | |
| - - 0.99 - - - - - 0.01 - jne .LBB1_6 | |
| - - - 0.03 - - - 0.97 - - lea rax, [rdi + 1] | |
| - - 0.98 0.01 0.50 0.50 - - 0.01 - cmp byte ptr [rdi], sil | |
| - - 0.97 - - - - - 0.03 - je .LBB1_16 | |
| - - - 0.01 - - - 0.99 - - lea r8, [rdx - 1] | |
| - - - - - - - 0.02 0.98 - inc rdi | |
| - - 0.01 0.99 - - - - - - cmp rdx, 1 | |
| - - 0.02 - - - - - 0.98 - je .LBB1_5 | |
| - - - 0.99 - - - - 0.01 - mov ecx, eax | |
| - - 0.01 0.01 - - - 0.98 - - and ecx, 15 | |
| - - 0.98 - - - - 0.01 0.01 - inc rax | |
| - - - 0.03 - - - 0.02 0.95 - mov rdx, r8 | |
| - - - - - - - 0.99 0.01 - test rcx, rcx | |
| - - 0.98 - - - - - 0.02 - jne .LBB1_2 | |
| - - - 0.03 - - - - 0.97 - cmp r8, 16 | |
| - - 0.99 - - - - - 0.01 - jae .LBB1_7 | |
| - - 0.01 - - - - - 0.99 - jmp .LBB1_10 | |
| - - - 0.96 - - - 0.04 - - mov r8, rdx | |
| - - - 0.02 - - - 0.98 - - cmp r8, 16 | |
| - - 0.04 - - - - - 0.96 - jb .LBB1_10 | |
| - - 0.01 0.98 - - - - 0.01 - movzx eax, sil | |
| - - - - - - - 1.00 - - vmovd xmm0, eax | |
| - - - 0.02 - - - 0.01 0.97 - mov eax, 1 | |
| - - - 0.98 - - - - 0.02 - mov edx, 16 | |
| - - 4.00 0.99 0.51 0.49 - 3.00 0.01 - vpcmpestri xmm0, xmmword ptr [rdi], 0 | |
| - - 0.01 0.01 - - - 0.98 - - cmp ecx, 16 | |
| - - 0.98 - - - - - 0.02 - jne .LBB1_15 | |
| - - - 0.02 - - - - 0.98 - add r8, -16 | |
| - - 0.01 0.99 - - - - - - add rdi, 16 | |
| - - 0.03 0.96 - - - - 0.01 - cmp r8, 15 | |
| - - 0.02 - - - - - 0.98 - ja .LBB1_8 | |
| - - 0.01 0.95 - - - 0.01 0.03 - test r8, r8 | |
| - - 0.02 - - - - - 0.98 - je .LBB1_14 | |
| - - - 0.02 0.49 0.51 - 0.01 0.97 - cmp byte ptr [rdi], sil | |
| - - 0.02 - - - - - 0.98 - je .LBB1_16 | |
| - - 0.01 0.98 - - - 0.01 - - inc rdi | |
| - - - 0.01 - - - 0.98 0.01 - dec r8 | |
| - - 0.98 - - - - - 0.02 - jne .LBB1_11 | |
| - - - - - - - - - - xor edi, edi | |
| - - 0.01 0.99 - - - - - - mov rax, rdi | |
| - - 0.01 - 0.50 0.50 - 0.99 1.00 - ret | |
| - - - 0.98 - - - 0.02 - - mov eax, ecx | |
| - - - 0.02 - - - 0.03 0.95 - add rdi, rax | |
| - - 0.01 0.02 - - - 0.96 0.01 - mov rax, rdi | |
| - - - 1.00 0.50 0.50 - - 1.00 - ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment