Created
December 31, 2025 16:45
-
-
Save iczelia/8e6df523302d8d7df7a5994bf2c5133c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Iterations: 100 | |
| Instructions: 4900 | |
| Total Cycles: 1511 | |
| Total uOps: 5600 | |
| Dispatch Width: 4 | |
| uOps Per Cycle: 3.71 | |
| IPC: 3.24 | |
| Block RThroughput: 14.0 | |
| Instruction Info: | |
| [1]: #uOps | |
| [2]: Latency | |
| [3]: RThroughput | |
| [4]: MayLoad | |
| [5]: MayStore | |
| [6]: HasSideEffects (U) | |
| [1] [2] [3] [4] [5] [6] Instructions: | |
| 1 1 0.25 mov rax, rdi | |
| 1 1 0.25 test rdx, rdx | |
| 1 1 0.50 sete cl | |
| 1 1 0.25 test al, 15 | |
| 1 1 0.50 sete dil | |
| 1 1 0.25 or dil, cl | |
| 1 1 0.50 jne .LBB0_6 | |
| 1 1 0.50 lea rdi, [rax + 1] | |
| 2 6 0.50 * cmp byte ptr [rax], sil | |
| 1 1 0.50 je .LBB0_16 | |
| 1 1 0.50 lea rcx, [rdx - 1] | |
| 1 1 0.25 inc rax | |
| 1 1 0.25 cmp rdx, 1 | |
| 1 1 0.50 je .LBB0_5 | |
| 1 1 0.25 mov r8d, edi | |
| 1 1 0.25 and r8d, 15 | |
| 1 1 0.25 inc rdi | |
| 1 1 0.25 mov rdx, rcx | |
| 1 1 0.25 test r8, r8 | |
| 1 1 0.50 jne .LBB0_2 | |
| 1 1 0.25 cmp rcx, 16 | |
| 1 1 0.50 jae .LBB0_7 | |
| 1 1 0.50 jmp .LBB0_10 | |
| 1 1 0.25 mov rcx, rdx | |
| 1 1 0.25 cmp rcx, 16 | |
| 1 1 0.50 jb .LBB0_10 | |
| 1 1 1.00 vmovd xmm0, esi | |
| 1 3 1.00 vpbroadcastb xmm0, xmm0 | |
| 2 6 0.50 * vpcmpeqb xmm1, xmm0, xmmword ptr [rax] | |
| 1 3 1.00 vpmovmskb edx, xmm1 | |
| 1 1 0.25 test edx, edx | |
| 1 1 0.50 jne .LBB0_15 | |
| 1 1 0.25 add rcx, -16 | |
| 1 1 0.25 add rax, 16 | |
| 1 1 0.25 cmp rcx, 15 | |
| 1 1 0.50 ja .LBB0_8 | |
| 1 1 0.25 test rcx, rcx | |
| 1 1 0.50 je .LBB0_14 | |
| 2 6 0.50 * cmp byte ptr [rax], sil | |
| 1 1 0.50 je .LBB0_16 | |
| 1 1 0.25 inc rax | |
| 1 1 0.25 dec rcx | |
| 1 1 0.50 jne .LBB0_11 | |
| 1 0 0.25 xor eax, eax | |
| 3 7 1.00 U ret | |
| 1 0 0.25 xor ecx, ecx | |
| 1 3 1.00 tzcnt ecx, edx | |
| 1 1 0.25 add rax, rcx | |
| 3 7 1.00 U ret | |
| Resources: | |
| [0] - BWDivider | |
| [1] - BWFPDivider | |
| [2] - BWPort0 | |
| [3] - BWPort1 | |
| [4] - BWPort2 | |
| [5] - BWPort3 | |
| [6] - BWPort4 | |
| [7] - BWPort5 | |
| [8] - BWPort6 | |
| [9] - BWPort7 | |
| Resource pressure per iteration: | |
| [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] | |
| - - 12.02 11.98 2.50 2.50 - 12.00 13.00 - | |
| Resource pressure by instruction: | |
| [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: | |
| - - - 0.02 - - - 0.97 0.01 - mov rax, rdi | |
| - - 0.98 - - - - 0.01 0.01 - test rdx, rdx | |
| - - 0.01 - - - - - 0.99 - sete cl | |
| - - - 0.97 - - - 0.03 - - test al, 15 | |
| - - 0.01 - - - - - 0.99 - sete dil | |
| - - 0.01 0.97 - - - 0.01 0.01 - or dil, cl | |
| - - 1.00 - - - - - - - jne .LBB0_6 | |
| - - - 0.03 - - - 0.97 - - lea rdi, [rax + 1] | |
| - - - 0.97 0.50 0.50 - 0.01 0.02 - cmp byte ptr [rax], sil | |
| - - 0.99 - - - - - 0.01 - je .LBB0_16 | |
| - - - - - - - 1.00 - - lea rcx, [rdx - 1] | |
| - - 0.01 0.01 - - - - 0.98 - inc rax | |
| - - - 0.03 - - - 0.01 0.96 - cmp rdx, 1 | |
| - - 0.98 - - - - - 0.02 - je .LBB0_5 | |
| - - 0.01 - - - - 0.98 0.01 - mov r8d, edi | |
| - - - 0.03 - - - 0.97 - - and r8d, 15 | |
| - - - 0.01 - - - 0.02 0.97 - inc rdi | |
| - - - 0.02 - - - 0.97 0.01 - mov rdx, rcx | |
| - - 0.02 0.97 - - - - 0.01 - test r8, r8 | |
| - - 0.02 - - - - - 0.98 - jne .LBB0_2 | |
| - - 0.01 - - - - 0.02 0.97 - cmp rcx, 16 | |
| - - 0.97 - - - - - 0.03 - jae .LBB0_7 | |
| - - 0.03 - - - - - 0.97 - jmp .LBB0_10 | |
| - - - 0.01 - - - 0.99 - - mov rcx, rdx | |
| - - - 0.03 - - - 0.97 - - cmp rcx, 16 | |
| - - 0.98 - - - - - 0.02 - jb .LBB0_10 | |
| - - - - - - - 1.00 - - vmovd xmm0, esi | |
| - - - - - - - 1.00 - - vpbroadcastb xmm0, xmm0 | |
| - - - 1.00 0.50 0.50 - - - - vpcmpeqb xmm1, xmm0, xmmword ptr [rax] | |
| - - 1.00 - - - - - - - vpmovmskb edx, xmm1 | |
| - - - 0.97 - - - 0.03 - - test edx, edx | |
| - - 0.99 - - - - - 0.01 - jne .LBB0_15 | |
| - - - 0.99 - - - - 0.01 - add rcx, -16 | |
| - - 0.02 0.01 - - - - 0.97 - add rax, 16 | |
| - - - 0.96 - - - 0.01 0.03 - cmp rcx, 15 | |
| - - 0.98 - - - - - 0.02 - ja .LBB0_8 | |
| - - 0.97 0.01 - - - 0.02 - - test rcx, rcx | |
| - - 0.02 - - - - - 0.98 - je .LBB0_14 | |
| - - 0.01 0.02 0.50 0.50 - 0.01 0.96 - cmp byte ptr [rax], sil | |
| - - 1.00 - - - - - - - je .LBB0_16 | |
| - - 0.02 0.01 - - - 0.96 0.01 - inc rax | |
| - - - 0.96 - - - 0.03 0.01 - dec rcx | |
| - - 0.98 - - - - - 0.02 - jne .LBB0_11 | |
| - - - - - - - - - - xor eax, eax | |
| - - - 0.03 0.50 0.50 - 0.97 1.00 - ret | |
| - - - - - - - - - - xor ecx, ecx | |
| - - - 1.00 - - - - - - tzcnt ecx, edx | |
| - - - 0.97 - - - 0.02 0.01 - add rax, rcx | |
| - - - 0.98 0.50 0.50 - 0.02 1.00 - ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment