Skip to content

Instantly share code, notes, and snippets.

@EgorBot
Created September 29, 2024 23:32
Show Gist options
  • Save EgorBot/69231f5bf90662ab86ce56f076186913 to your computer and use it in GitHub Desktop.
Save EgorBot/69231f5bf90662ab86ce56f076186913 to your computer and use it in GitHub Desktop.
diff_asm_f256ece8.asm
Samples: 96K of event 'cpu-clock', 11999 Hz, Event count (approx.): 8028475560, [percent: local period]
void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]() /tmp/jitted-60783-7253.so
Percent
Disassembly of section .text:
0000000000000080 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]>:
void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]():
0.58 push rbp
push rbx
push rax
lea rbp,[rsp+0x10]
0.01 cmp ecx,esi
↓ jl 19a
cmp rdi,rdx
↓ je 3c
test esi,esi
↓ je 3c
0.26 test ecx,ecx
↓ je 3c
0.00 mov rbx,rdx
sub rbx,rdi
mov eax,esi
0.02 shl rax,0x2
cmp rbx,rax
↓ jb 90
0.00 mov eax,ecx
shl rax,0x2
0.23 neg rax
0.01 cmp rax,rbx
↓ jb 90
0.03 3c: xor eax,eax
lea ecx,[rsi-0x10]
0.03 test ecx,ecx
↓ jl 68
0.01 vmovups zmm0,ZMMWORD PTR [rip+0x191] # 260 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]+0x1e0>
0.39 nop
4.94 50: mov r8d,eax
0.35 vpermb zmm1,zmm0,ZMMWORD PTR [rdi+r8*4]
27.17 vmovups ZMMWORD PTR [rdx+r8*4],zmm1
65.29 add eax,0x10
0.37 cmp eax,ecx
0.00 ↑ jle 50
68: lea ecx,[rsi-0x8]
6b: cmp eax,ecx
↓ jle 177
0.01 lea ecx,[rsi-0x4]
76: cmp eax,ecx
↓ jle 154
7e: cmp eax,esi
↓ jl 13e
86: vzeroupper
0.27 add rsp,0x8
0.01 pop rbx
0.00 pop rbp
0.01 ← ret
90: test bl,0x3
↓ je a2
movabs rax,0x75b4e5bacc78
→ call QWORD PTR [rax]
int3
a2: mov rax,rbx
sar rax,0x3f
and rax,0x3
add rax,rbx
sar rax,0x2
test eax,eax
↑ jl 3c
cmp esi,0x10
↓ jl e1
bd: add esi,0xfffffff0
mov eax,esi
vmovups zmm0,ZMMWORD PTR [rip+0x114] # 260 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]+0x1e0>
vpermb zmm0,zmm0,ZMMWORD PTR [rdi+rax*4]
mov eax,esi
vmovups ZMMWORD PTR [rdx+rax*4],zmm0
cmp esi,0x10
↑ jge bd
e1: cmp esi,0x8
↓ jl 103
add esi,0xfffffff8
mov eax,esi
vmovups ymm0,YMMWORD PTR [rip+0xed] # 260 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]+0x1e0>
vpermb ymm0,ymm0,YMMWORD PTR [rdi+rax*4]
mov eax,esi
vmovups YMMWORD PTR [rdx+rax*4],ymm0
↑ jmp e1
103: cmp esi,0x4
↓ jl 124
add esi,0xfffffffc
mov eax,esi
vmovups xmm0,XMMWORD PTR [rdi+rax*4]
vpshufb xmm0,xmm0,XMMWORD PTR [rip+0xc5] # 260 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]+0x1e0>
mov eax,esi
vmovups XMMWORD PTR [rdx+rax*4],xmm0
↑ jmp 103
124: test esi,esi
↑ jle 86
dec esi
movsxd rax,esi
mov eax,DWORD PTR [rdi+rax*4]
movsxd rcx,esi
movbe DWORD PTR [rdx+rcx*4],eax
↑ jmp 124
13e: movsxd rcx,eax
mov ecx,DWORD PTR [rdi+rcx*4]
movsxd r8,eax
movbe DWORD PTR [rdx+r8*4],ecx
inc eax
↑ jmp 7e
154: mov r8d,eax
vmovups xmm0,XMMWORD PTR [rdi+r8*4]
vpshufb xmm0,xmm0,XMMWORD PTR [rip+0x7a] # 260 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]+0x1e0>
mov r8d,eax
vmovups XMMWORD PTR [rdx+r8*4],xmm0
add eax,0x4
↑ jmp 76
177: mov r8d,eax
vmovups ymm0,YMMWORD PTR [rip+0x5e] # 260 <void [System.Private.CoreLib] System.Buffers.Binary.BinaryPrimitives::ReverseEndianness(valuetype System.ReadOnlySpan`1<!!0>,valuetype System.Span`1<!!0>)[OptimizedTier1]+0x1e0>
vpermb ymm0,ymm0,YMMWORD PTR [rdi+r8*4]
mov r8d,eax
vmovups YMMWORD PTR [rdx+r8*4],ymm0
add eax,0x8
↑ jmp 6b
19a: movabs rax,0x75b4e5bacbe8
→ call QWORD PTR [rax]
int3
Samples: 96K of event 'cpu-clock', 11999 Hz, Event count (approx.): 8028475560, [percent: local period]
instance void [benchapp] Bench::Reverse()[OptimizedTier1]() /tmp/jitted-60783-7255.so
Percent
Disassembly of section .text:
0000000000000080 <instance void [benchapp] Bench::Reverse()[OptimizedTier1]>:
instance void [benchapp] Bench::Reverse()[OptimizedTier1]():
0.40 push rbp
mov rbp,rsp
0.50 mov rsi,QWORD PTR [rdi+0x8]
7.78 test rsi,rsi
↓ jne 38
xor edx,edx
xor esi,esi
0.40 11: mov rdi,QWORD PTR [rdi+0x10]
test rdi,rdi
↓ je 41
1.09 lea rcx,[rdi+0x10]
1.14 mov eax,DWORD PTR [rdi+0x8]
22.46 21: mov rdi,rdx
mov rdx,rcx
11.60 mov ecx,eax
2.23 movabs rax,0x75b4e5b0e310
0.25 → call QWORD PTR [rax]
0.55 nop
11.50 pop rbp
0.10 ← ret
1.04 38: lea rdx,[rsi+0x10]
17.20 mov esi,DWORD PTR [rsi+0x8]
21.76 ↑ jmp 11
41: xor ecx,ecx
xor eax,eax
↑ jmp 21
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment