Skip to content

Instantly share code, notes, and snippets.

@EgorBot
Created September 24, 2024 18:39
Show Gist options
  • Save EgorBot/281e11f5a16c517daabecb2bf3bbf10c to your computer and use it in GitHub Desktop.
Save EgorBot/281e11f5a16c517daabecb2bf3bbf10c to your computer and use it in GitHub Desktop.
base_asm_cc85073d.asm
Samples: 96K of event 'cpu-clock', 11999 Hz, Event count (approx.): 8021391660, [percent: local period]
void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]() /tmp/jitted-45375-7260.so
Percent
Disassembly of section .text:
0000000000000080 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]>:
0.39 push rbp
push rbx
push rax
lea rbp,[rsp+0x10]
0.02 cmp ecx,esi
↓ jl 141
0.06 cmp rdi,rdx
↓ je 3c
test esi,esi
↓ je 3c
0.26 test ecx,ecx
↓ je 3c
0.34 mov rbx,rdx
sub rbx,rdi
mov eax,esi
shl rax,0x2
0.07 cmp rbx,rax
↓ jb 84
0.24 mov eax,ecx
shl rax,0x2
0.18 neg rax
0.32 cmp rax,rbx
↓ jb 84
0.22 3c: xor eax,eax
lea ecx,[rsi-0x8]
test ecx,ecx
↓ jl 67
0.03 vmovups ymm0,YMMWORD PTR [rip+0x113] # 1e0 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]+0x160>
0.09 nop
11.35 50: mov r8d,eax
0.01 vpermb ymm1,ymm0,YMMWORD PTR [rdi+r8*4]
30.56 vmovups YMMWORD PTR [rdx+r8*4],ymm1
27.50 add eax,0x8
0.21 cmp eax,ecx
↑ jle 50
0.41 67: lea ecx,[rsi-0x4]
6a: cmp eax,ecx
↓ jle 11e
72: cmp eax,esi
↓ jl 108
0.00 7a: vzeroupper
add rsp,0x8
pop rbx
27.73 pop rbp
0.01 ← ret
84: test bl,0x3
↓ je 96
movabs rax,0x7a11f318cbe8
→ call QWORD PTR [rax]
int3
96: mov rax,rbx
sar rax,0x3f
and rax,0x3
add rax,rbx
sar rax,0x2
test eax,eax
↑ jl 3c
cmp esi,0x8
↓ jl d1
b1: add esi,0xfffffff8
mov eax,esi
vmovups ymm0,YMMWORD PTR [rip+0xa2] # 1e0 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]+0x160>
vpermb ymm0,ymm0,YMMWORD PTR [rdi+rax*4]
mov eax,esi
vmovups YMMWORD PTR [rdx+rax*4],ymm0
cmp esi,0x8
↑ jge b1
d1: cmp esi,0x4
↓ jl f2
add esi,0xfffffffc
mov eax,esi
vmovups xmm0,XMMWORD PTR [rdi+rax*4]
vpshufb xmm0,xmm0,XMMWORD PTR [rip+0x77] # 1e0 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]+0x160>
mov eax,esi
vmovups XMMWORD PTR [rdx+rax*4],xmm0
↑ jmp d1
f2: test esi,esi
↑ jle 7a
dec esi
movsxd rax,esi
mov eax,DWORD PTR [rdi+rax*4]
movsxd rcx,esi
movbe DWORD PTR [rdx+rcx*4],eax
↑ jmp f2
108: movsxd rcx,eax
mov ecx,DWORD PTR [rdi+rcx*4]
movsxd r8,eax
movbe DWORD PTR [rdx+r8*4],ecx
inc eax
↑ jmp 72
11e: mov r8d,eax
vmovups xmm0,XMMWORD PTR [rdi+r8*4]
vpshufb xmm0,xmm0,XMMWORD PTR [rip+0x30] # 1e0 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]+0x160>
mov r8d,eax
vmovups XMMWORD PTR [rdx+r8*4],xmm0
add eax,0x4
↑ jmp 6a
141: movabs rax,0x7a11f318cb58
→ call QWORD PTR [rax]
int3
Samples: 96K of event 'cpu-clock', 11999 Hz, Event count (approx.): 8021391660, [percent: local period]
instance void [benchapp] Bench::Reverse()[OptimizedTier1]() /tmp/jitted-45375-7262.so
Percent
Disassembly of section .text:
0000000000000080 <instance void [benchapp] Bench::Reverse()[OptimizedTier1]>:
4.91 push rbp
0.26 mov rbp,rsp
mov rsi,QWORD PTR [rdi+0x8]
29.33 test rsi,rsi
↓ jne 38
xor edx,edx
xor esi,esi
0.44 11: mov rdi,QWORD PTR [rdi+0x10]
test rdi,rdi
↓ je 41
lea rcx,[rdi+0x10]
mov eax,DWORD PTR [rdi+0x8]
0.56 21: mov rdi,rdx
mov rdx,rcx
mov ecx,eax
7.75 movabs rax,0x7a11f30ee3a0
0.60 → call QWORD PTR [rax]
8.12 nop
pop rbp
8.16 ← ret
7.75 38: lea rdx,[rsi+0x10]
mov esi,DWORD PTR [rsi+0x8]
32.12 ↑ jmp 11
41: xor ecx,ecx
xor eax,eax
↑ jmp 21
Samples: 96K of event 'cpu-clock', 11999 Hz, Event count (approx.): 8021391660, [percent: local period]
instance void [5bdff830-7c42-4f2b-9661-e5a6009c552dEmitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]() /tmp/jitted-45375-7220.so
Percent
Disassembly of section .text:
0000000000000080 <instance void [5bdff830-7c42-4f2b-9661-e5a6009c552dEmitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]>:
push rbp
push r15
push rbx
lea rbp,[rsp+0x10]
mov rbx,rdi
test rsi,rsi
↓ jle d1
mov r15,rsi
18: mov rax,QWORD PTR [rbx+0x40]
1.54 mov rdi,QWORD PTR [rax+0x8]
3.93 → call QWORD PTR [rax+0x18]
1.09 mov rax,QWORD PTR [rbx+0x40]
1.24 mov rdi,QWORD PTR [rax+0x8]
3.56 → call QWORD PTR [rax+0x18]
0.52 mov rax,QWORD PTR [rbx+0x40]
1.76 mov rdi,QWORD PTR [rax+0x8]
4.16 → call QWORD PTR [rax+0x18]
0.60 mov rax,QWORD PTR [rbx+0x40]
1.27 mov rdi,QWORD PTR [rax+0x8]
4.05 → call QWORD PTR [rax+0x18]
0.60 mov rax,QWORD PTR [rbx+0x40]
1.61 mov rdi,QWORD PTR [rax+0x8]
3.75 → call QWORD PTR [rax+0x18]
0.86 mov rax,QWORD PTR [rbx+0x40]
1.31 mov rdi,QWORD PTR [rax+0x8]
3.26 → call QWORD PTR [rax+0x18]
0.60 mov rax,QWORD PTR [rbx+0x40]
1.50 mov rdi,QWORD PTR [rax+0x8]
3.90 → call QWORD PTR [rax+0x18]
0.71 mov rax,QWORD PTR [rbx+0x40]
1.20 mov rdi,QWORD PTR [rax+0x8]
4.27 → call QWORD PTR [rax+0x18]
0.82 mov rax,QWORD PTR [rbx+0x40]
1.05 mov rdi,QWORD PTR [rax+0x8]
4.12 → call QWORD PTR [rax+0x18]
0.52 mov rax,QWORD PTR [rbx+0x40]
1.54 mov rdi,QWORD PTR [rax+0x8]
3.93 → call QWORD PTR [rax+0x18]
0.64 mov rax,QWORD PTR [rbx+0x40]
1.72 mov rdi,QWORD PTR [rax+0x8]
4.83 → call QWORD PTR [rax+0x18]
0.82 mov rax,QWORD PTR [rbx+0x40]
1.50 mov rdi,QWORD PTR [rax+0x8]
4.38 → call QWORD PTR [rax+0x18]
0.71 mov rax,QWORD PTR [rbx+0x40]
1.54 mov rdi,QWORD PTR [rax+0x8]
4.57 → call QWORD PTR [rax+0x18]
0.71 mov rax,QWORD PTR [rbx+0x40]
1.91 mov rdi,QWORD PTR [rax+0x8]
4.31 → call QWORD PTR [rax+0x18]
0.90 mov rax,QWORD PTR [rbx+0x40]
1.46 mov rdi,QWORD PTR [rax+0x8]
4.23 → call QWORD PTR [rax+0x18]
0.94 mov rax,QWORD PTR [rbx+0x40]
1.76 mov rdi,QWORD PTR [rax+0x8]
3.15 → call QWORD PTR [rax+0x18]
0.64 dec r15
↑ jne 18
d1: pop rbx
pop r15
pop rbp
← ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment