Skip to content

Instantly share code, notes, and snippets.

@EgorBot
Created November 20, 2024 20:54
Show Gist options
  • Save EgorBot/0430a175d5fe1d918a25acdce0d357cf to your computer and use it in GitHub Desktop.
Save EgorBot/0430a175d5fe1d918a25acdce0d357cf to your computer and use it in GitHub Desktop.
diff_asm_c807237a.asm
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5018508000, [percent: local period]
RuntimeTypeHandle::GetElementTypeHandleFromHandle(void*)() /home/egorbot/core_root_diff/libcoreclr.so
Percent
Disassembly of section .text:
00000000003b20b0 <GetCLRRuntimeHost@@V1.0+0x17e3c8>:
1.96 sub sp, sp, #0x30
1.38 stp x29, x30, [sp, #16]
1.48 str x19, [sp, #32]
1.71 add x29, sp, #0x10
1.61 adrp x8, __getauxval@plt+0x362e0
1.50 ldr x8, [x8, #1360]
1.36 ldr x8, [x8]
17.92 str x8, [sp, #8]
1.63 str x0, [sp]
1.71 ↓ tbnz w0, #1, 5c
1.38 ldr w8, [x0]
and w8, w8, #0xc0000
1.59 cmp w8, #0x80, lsl #12
↓ b.ne 88
3.71 add x8, x0, #0x30
1.73 3c: ldr x0, [x8]
0.06 adrp x8, __getauxval@plt+0x362e0
1.59 ldr x8, [x8, #1360]
1.69 ldr x9, [sp, #8]
39.15 ldr x8, [x8]
2.69 cmp x8, x9
↓ b.eq a4
58: → bl __stack_chk_fail@plt
5c: mov x0, sp
bl 2ed770 <GetCLRRuntimeHost@@V1.0+0xb9a88>
↓ cbnz w0, 88
ldr x8, [sp]
sub x19, x8, #0x2
mov x0, x19
bl 2eaa24 <GetCLRRuntimeHost@@V1.0+0xb6d3c>
↓ cbnz w0, 88
mov x0, x19
bl 2eab80 <GetCLRRuntimeHost@@V1.0+0xb6e98>
↓ cbz w0, b4
88: mov x0, xzr
adrp x8, __getauxval@plt+0x362e0
ldr x8, [x8, #1360]
ldr x9, [sp, #8]
ldr x8, [x8]
cmp x8, x9
↑ b.ne 58
10.66 a4: ldp x29, x30, [sp, #16]
1.96 ldr x19, [sp, #32]
add sp, sp, #0x30
1.52 ← ret
b4: add x8, x19, #0x10
↑ b 3c
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5018508000, [percent: local period]
instance class [System.Runtime]System.Type [benchapp] Bench::ValueElementType()[OptimizedTier1]() /tmp/jitted-50802-7275.so
Percent
Disassembly of section .text:
0000000000000080 <instance class [System.Runtime]System.Type [benchapp] Bench::ValueElementType()[OptimizedTier1]>:
6.88 stp x29, x30, [sp, #-32]!
10.28 str x19, [sp, #24]
4.38 mov x29, sp
mov x0, #0xdc80 // #56448
4.10 movk x0, #0x1dc8, lsl #16
movk x0, #0xf73f, lsl #32
7.08 bl fffffffffffeef60 <instance class [System.Runtime]System.Type [benchapp] Bench::ValueElementType()[OptimizedTier1]+0xfffffffffffeeee0>
7.50 mov x19, x0
↓ cbz x19, 60
5.69 mov x0, x19
6.53 bl fffffffffffeef70 <instance class [System.Runtime]System.Type [benchapp] Bench::ValueElementType()[OptimizedTier1]+0xfffffffffffeeef0>
6.53 ↓ cbnz x0, 48
mov x0, x19
mov x1, #0x5a40 // #23104
movk x1, #0x1d47, lsl #16
movk x1, #0xf73f, lsl #32
ldr x1, [x1]
→ blr x1
20.28 48: mov x1, #0x7eb8 // #32440
movk x1, #0x6821, lsl #16
5.62 movk x1, #0xf73f, lsl #32
5.56 54: ldr x19, [sp, #24]
5.07 ldp x29, x30, [sp], #32
4.51 ← ret
60: mov x0, xzr
↑ b 54
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5018508000, [percent: local period]
JIT_WriteBarrier() /home/egorbot/core_root_diff/libcoreclr.so
Percent
Disassembly of section .text:
00000000004a5b74 <GetCLRRuntimeHost@@V1.0+0x271e8c>:
9.40 stlr x15, [x14]
9.73 ldr x12, GetCLRRuntimeHost@@V1.0+0x271f28
20.58 ↓ cbz x12, 20
add x12, x12, x14, lsr #12
ldrb w17, [x12]
↓ cbnz x17, 20
mov w17, #0xff // #255
strb w17, [x12]
8.28 20: ldr x12, GetCLRRuntimeHost@@V1.0+0x271f30
14.65 ldr x17, GetCLRRuntimeHost@@V1.0+0x271f38
0.56 cmp x15, x12
15.44 ccmp x15, x17, #0x2, cs // cs = hs, nlast
8.72 ↓ b.cs 6c
ldr x12, GetCLRRuntimeHost@@V1.0+0x271f18
add x15, x12, x14, lsr #11
ldrb w12, [x15]
cmp x12, #0xff
↓ b.eq 6c
mov x12, #0xff // #255
strb w12, [x15]
ldr x12, GetCLRRuntimeHost@@V1.0+0x271f20
add x15, x12, x14, lsr #21
ldrb w12, [x15]
cmp x12, #0xff
↓ b.eq 6c
mov x12, #0xff // #255
strb w12, [x15]
12.64 6c: add x14, x14, #0x8
← ret
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5018508000, [percent: local period]
instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]() /tmp/jitted-50802-7236.so
Percent
Disassembly of section .text:
0000000000000080 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]>:
stp x29, x30, [sp, #-48]!
stp x19, x20, [sp, #24]
str x21, [sp, #40]
mov x29, sp
mov x19, x0
cmp x1, #0x0
↓ b.le 268
mov x20, x1
0.92 20: ldp x1, x21, [x19, #48]
0.53 ldr x0, [x1, #8]
0.92 ldr x1, [x1, #24]
0.66 → blr x1
0.92 mov x15, x0
add x0, x21, #0x8
0.40 mov x14, x0
0.66 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.92 stlr xzr, [x0]
0.79 ldp x1, x21, [x19, #48]
0.92 ldr x0, [x1, #8]
0.92 ldr x1, [x1, #24]
0.66 → blr x1
1.85 mov x15, x0
add x0, x21, #0x8
0.53 mov x14, x0
0.26 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.66 stlr xzr, [x0]
1.19 ldp x1, x21, [x19, #48]
0.79 ldr x0, [x1, #8]
0.92 ldr x1, [x1, #24]
0.79 → blr x1
0.92 mov x15, x0
add x0, x21, #0x8
0.92 mov x14, x0
0.92 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.79 stlr xzr, [x0]
0.92 ldp x1, x21, [x19, #48]
1.06 ldr x0, [x1, #8]
1.85 ldr x1, [x1, #24]
1.72 → blr x1
0.40 mov x15, x0
0.40 add x0, x21, #0x8
mov x14, x0
0.13 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.66 stlr xzr, [x0]
0.79 ldp x1, x21, [x19, #48]
0.40 ldr x0, [x1, #8]
0.92 ldr x1, [x1, #24]
0.92 → blr x1
1.32 mov x15, x0
add x0, x21, #0x8
0.66 mov x14, x0
0.53 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.40 stlr xzr, [x0]
0.26 ldp x1, x21, [x19, #48]
0.79 ldr x0, [x1, #8]
1.32 ldr x1, [x1, #24]
0.13 → blr x1
0.53 mov x15, x0
add x0, x21, #0x8
0.26 mov x14, x0
0.53 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.53 stlr xzr, [x0]
0.92 ldp x1, x21, [x19, #48]
0.79 ldr x0, [x1, #8]
1.72 ldr x1, [x1, #24]
0.92 → blr x1
1.06 mov x15, x0
add x0, x21, #0x8
0.53 mov x14, x0
0.53 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.26 stlr xzr, [x0]
0.66 ldp x1, x21, [x19, #48]
0.92 ldr x0, [x1, #8]
1.58 ldr x1, [x1, #24]
1.32 → blr x1
1.45 mov x15, x0
add x0, x21, #0x8
0.40 mov x14, x0
0.92 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
1.06 stlr xzr, [x0]
0.66 ldp x1, x21, [x19, #48]
0.40 ldr x0, [x1, #8]
2.11 ldr x1, [x1, #24]
1.45 → blr x1
0.66 mov x15, x0
add x0, x21, #0x8
0.92 mov x14, x0
0.92 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.53 stlr xzr, [x0]
0.26 ldp x1, x21, [x19, #48]
0.13 ldr x0, [x1, #8]
1.19 ldr x1, [x1, #24]
0.53 → blr x1
0.92 mov x15, x0
add x0, x21, #0x8
1.06 mov x14, x0
0.26 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.40 stlr xzr, [x0]
0.53 ldp x1, x21, [x19, #48]
0.66 ldr x0, [x1, #8]
1.06 ldr x1, [x1, #24]
0.92 → blr x1
0.26 mov x15, x0
add x0, x21, #0x8
0.13 mov x14, x0
0.92 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
1.06 stlr xzr, [x0]
0.40 ldp x1, x21, [x19, #48]
0.26 ldr x0, [x1, #8]
1.98 ldr x1, [x1, #24]
0.79 → blr x1
0.79 mov x15, x0
0.40 add x0, x21, #0x8
mov x14, x0
0.53 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.40 stlr xzr, [x0]
0.40 ldp x1, x21, [x19, #48]
0.26 ldr x0, [x1, #8]
2.24 ldr x1, [x1, #24]
0.26 → blr x1
0.66 mov x15, x0
add x0, x21, #0x8
0.79 mov x14, x0
0.53 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.53 stlr xzr, [x0]
0.40 ldp x1, x21, [x19, #48]
0.40 ldr x0, [x1, #8]
0.79 ldr x1, [x1, #24]
0.66 → blr x1
1.06 mov x15, x0
add x0, x21, #0x8
0.92 mov x14, x0
0.66 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.92 stlr xzr, [x0]
0.40 ldp x1, x21, [x19, #48]
1.45 ldr x0, [x1, #8]
0.79 ldr x1, [x1, #24]
0.92 → blr x1
0.92 mov x15, x0
add x0, x21, #0x8
0.66 mov x14, x0
0.92 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.26 stlr xzr, [x0]
0.40 ldp x1, x21, [x19, #48]
0.66 ldr x0, [x1, #8]
1.58 ldr x1, [x1, #24]
0.79 → blr x1
0.92 mov x15, x0
add x0, x21, #0x8
0.53 mov x14, x0
0.79 bl fffffffffffff1d8 <instance void [04e4d8c3-3aa1-47fe-99f7-02c4673b10f9Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]+0xfffffffffffff158>
0.40 stlr xzr, [x0]
0.40 sub x20, x20, #0x1
↑ cbnz x20, 20
268: ldr x21, [sp, #40]
ldp x19, x20, [sp, #24]
ldp x29, x30, [sp], #48
← ret
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5018508000, [percent: local period]
RuntimeTypeHandle::GetTypeFromHandleIfExists(void*)() /home/egorbot/core_root_diff/libcoreclr.so
Percent
Disassembly of section .text:
00000000003b15a8 <GetCLRRuntimeHost@@V1.0+0x17d8c0>:
11.81 ↓ tbnz w0, #1, 10
11.65 ldr x8, [x0, #32]
14.14 ldr x0, [x8, #16]!
62.40 ← ret
10: add x8, x0, #0x6
ldr x0, [x8]
← ret
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5018508000, [percent: local period]
stub<1207> getNextJumpStub<emitBackToBackJump>() /tmp/jitted-50802-7270.so
Percent
Disassembly of section .text:
0000000000000080 <stub<1207> getNextJumpStub<emitBackToBackJump>>:
32.64 ldr x16, stub<1207
67.36 → br x16
ldnp d8, d5, [x13, #432]
udf #63295
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5018508000, [percent: local period]
stub<1206> getNextJumpStub<emitBackToBackJump>() /tmp/jitted-50802-7269.so
Percent
Disassembly of section .text:
0000000000000080 <stub<1206> getNextJumpStub<emitBackToBackJump>>:
33.19 ldr x16, stub<1206
66.81 → br x16
ldnp d16, d8, [x5, #432]
udf #63295
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5018508000, [percent: local period]
stub<1165> getNextJumpStub<emitBackToBackJump>() /tmp/jitted-50802-6904.so
Percent
Disassembly of section .text:
0000000000000080 <stub<1165> getNextJumpStub<emitBackToBackJump>>:
32.42 ldr x16, stub<1165
67.58 → br x16
ldnp d20, d22, [x27, #-352]
udf #63295
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment