Skip to content

Instantly share code, notes, and snippets.

@EgorBot
Created October 18, 2024 12:50
Show Gist options
  • Save EgorBot/7b05794ed4fdaf8907e115680691842b to your computer and use it in GitHub Desktop.
Save EgorBot/7b05794ed4fdaf8907e115680691842b to your computer and use it in GitHub Desktop.
diff_asm_829bf312.asm
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5002500000, [percent: local period]
Buffer::BulkMoveWithWriteBarrier(void*, void*, unsigned long)() /home/egorbot/core_root_diff/libcoreclr.so
Percent
Disassembly of section .text:
000000000032932c <GetCLRRuntimeHost@@V1.0+0xf7130>:
0.51 stp x29, x30, [sp, #-32]!
0.91 stp x20, x19, [sp, #16]
0.47 mov x29, sp
0.41 cmp x0, x1
↓ b.eq 68
0.48 ↓ cbz x2, 68
0.45 adrp x8, __getauxval@plt+0x36350
adrp x9, __getauxval@plt+0x36350
0.63 ldr x8, [x8, #2088]
0.31 ldr x9, [x9, #2096]
0.54 ldr x8, [x8]
0.48 ldr x9, [x9]
0.29 cmp x8, x0
0.39 ccmp x9, x0, #0x0, ls // ls = plast
0.02 cset w8, ls // ls = plast
0.43 ↓ b.ls 44
0.53 dmb ish
53.60 44: cmp x2, #0x8
↓ b.cc 64
0.48 lsr x9, x2, #3
0.18 mov x10, x0
8.17 54: ldr x11, [x1], #8
4.97 subs x9, x9, #0x1
4.84 str x11, [x10], #8
2.54 ↑ b.ne 54
0.88 64: ↓ tbz w8, #0, a8
0.41 68: adrp x8, __getauxval@plt+0x36350
0.42 ldr x8, [x8, #712]
0.48 ldr w8, [x8]
0.57 ↓ cbz w8, 9c
nop
adr x0, GetCLRRuntimeHost@@V1.0+0xf7130
mov x1, xzr
bl 3490f8 <GetCLRRuntimeHost@@V1.0+0x116efc>
↓ cbnz x0, 9c
adrp x8, __getauxval@plt+0x36350
ldr x8, [x8, #1664]
94: ldr w9, [x8]
↑ cbz w9, 94
0.39 9c: ldp x20, x19, [sp, #16]
0.48 ldp x29, x30, [sp], #32
← ret
0.59 a8: adrp x8, __getauxval@plt+0x36350
0.55 ldr x8, [x8, #2168]
0.36 ldrb w8, [x8]
0.55 ↓ cbz w8, f8
add x8, x0, x2
adrp x9, __getauxval@plt+0x36350
lsr x10, x0, #12
sub x8, x8, #0x1
ldr x9, [x9, #2160]
mov x19, x0
lsr x8, x8, #12
mov x20, x2
mov w1, #0xff // #255
ldr x9, [x9]
sub x8, x8, x10
add x0, x9, x10
add x2, x8, #0x1
→ bl memset@plt
mov x2, x20
mov x0, x19
0.66 f8: add x8, x0, x2
adrp x12, __getauxval@plt+0x36350
0.47 lsr x11, x0, #11
add x9, x8, #0x7ff
0.60 ldr x12, [x12, #2080]
0.12 lsr x9, x9, #11
0.55 ldr x12, [x12]
0.44 sub x10, x9, x11
0.01 cmp x10, #0x8
0.47 add x9, x12, x11
0.14 ↓ b.cs 12c
0.47 mov x11, x10
↓ b 688
12c: cmp x10, #0x10
↓ b.cs 224
mov x13, xzr
138: movi v0.2d, #0xffffffffffffffff
add x15, x13, x11
and x14, x10, #0xfffffffffffffff8
add x15, x15, x12
add x9, x9, x14
and x11, x10, #0x7
sub x12, x13, x14
add x13, x15, #0x3
mov w15, #0xff // #255
↓ b 16c
160: adds x12, x12, #0x8
add x13, x13, #0x8
↓ b.eq 218
16c: ldur d1, [x13, #-3]
cmeq v1.8b, v1.8b, v0.8b
mvn v1.8b, v1.8b
umov w16, v1.b[0]
↓ tbnz w16, #0, 1bc
umov w16, v1.b[1]
↓ tbnz w16, #0, 1c8
188: umov w16, v1.b[2]
↓ tbnz w16, #0, 1d4
190: umov w16, v1.b[3]
↓ tbnz w16, #0, 1e0
198: umov w16, v1.b[4]
↓ tbnz w16, #0, 1ec
1a0: umov w16, v1.b[5]
↓ tbnz w16, #0, 1f8
1a8: umov w16, v1.b[6]
↓ tbnz w16, #0, 204
1b0: umov w16, v1.b[7]
↑ tbz w16, #0, 160
↓ b 210
1bc: sturb w15, [x13, #-3]
umov w16, v1.b[1]
↑ tbz w16, #0, 188
1c8: sturb w15, [x13, #-2]
umov w16, v1.b[2]
↑ tbz w16, #0, 190
1d4: sturb w15, [x13, #-1]
umov w16, v1.b[3]
↑ tbz w16, #0, 198
1e0: strb w15, [x13]
umov w16, v1.b[4]
↑ tbz w16, #0, 1a0
1ec: strb w15, [x13, #1]
umov w16, v1.b[5]
↑ tbz w16, #0, 1a8
1f8: strb w15, [x13, #2]
umov w16, v1.b[6]
↑ tbz w16, #0, 1b0
204: strb w15, [x13, #3]
umov w16, v1.b[7]
↑ tbz w16, #0, 160
210: strb w15, [x13, #4]
↑ b 160
218: cmp x10, x14
↓ b.eq 3a0
↓ b 688
224: movi v0.2d, #0xffffffffffffffff
and x13, x10, #0xfffffffffffffff0
add x14, x11, x12
add x14, x14, #0x7
mov w15, #0xff // #255
mov x16, x13
↓ b 24c
240: subs x16, x16, #0x10
add x14, x14, #0x10
↓ b.eq 398
24c: ldur q1, [x14, #-7]
cmeq v1.16b, v1.16b, v0.16b
mvn v1.16b, v1.16b
umov w17, v1.b[0]
↓ tbnz w17, #0, 2dc
umov w17, v1.b[1]
↓ tbnz w17, #0, 2e8
268: umov w17, v1.b[2]
↓ tbnz w17, #0, 2f4
270: umov w17, v1.b[3]
↓ tbnz w17, #0, 300
278: umov w17, v1.b[4]
↓ tbnz w17, #0, 30c
280: umov w17, v1.b[5]
↓ tbnz w17, #0, 318
288: umov w17, v1.b[6]
↓ tbnz w17, #0, 324
290: umov w17, v1.b[7]
↓ tbnz w17, #0, 330
298: umov w17, v1.b[8]
↓ tbnz w17, #0, 33c
2a0: umov w17, v1.b[9]
↓ tbnz w17, #0, 348
2a8: umov w17, v1.b[10]
↓ tbnz w17, #0, 354
2b0: umov w17, v1.b[11]
↓ tbnz w17, #0, 360
2b8: umov w17, v1.b[12]
↓ tbnz w17, #0, 36c
2c0: umov w17, v1.b[13]
↓ tbnz w17, #0, 378
2c8: umov w17, v1.b[14]
↓ tbnz w17, #0, 384
2d0: umov w17, v1.b[15]
↑ tbz w17, #0, 240
↓ b 390
2dc: sturb w15, [x14, #-7]
umov w17, v1.b[1]
↑ tbz w17, #0, 268
2e8: sturb w15, [x14, #-6]
umov w17, v1.b[2]
↑ tbz w17, #0, 270
2f4: sturb w15, [x14, #-5]
umov w17, v1.b[3]
↑ tbz w17, #0, 278
300: sturb w15, [x14, #-4]
umov w17, v1.b[4]
↑ tbz w17, #0, 280
30c: sturb w15, [x14, #-3]
umov w17, v1.b[5]
↑ tbz w17, #0, 288
318: sturb w15, [x14, #-2]
umov w17, v1.b[6]
↑ tbz w17, #0, 290
324: sturb w15, [x14, #-1]
umov w17, v1.b[7]
↑ tbz w17, #0, 298
330: strb w15, [x14]
umov w17, v1.b[8]
↑ tbz w17, #0, 2a0
33c: strb w15, [x14, #1]
umov w17, v1.b[9]
↑ tbz w17, #0, 2a8
348: strb w15, [x14, #2]
umov w17, v1.b[10]
↑ tbz w17, #0, 2b0
354: strb w15, [x14, #3]
umov w17, v1.b[11]
↑ tbz w17, #0, 2b8
360: strb w15, [x14, #4]
umov w17, v1.b[12]
↑ tbz w17, #0, 2c0
36c: strb w15, [x14, #5]
umov w17, v1.b[13]
↑ tbz w17, #0, 2c8
378: strb w15, [x14, #6]
umov w17, v1.b[14]
↑ tbz w17, #0, 2d0
384: strb w15, [x14, #7]
umov w17, v1.b[15]
↑ tbz w17, #0, 240
390: strb w15, [x14, #8]
↑ b 240
398: cmp x10, x13
↓ b.ne 67c
0.51 3a0: mov w9, #0x1fffff // #2097151
0.12 adrp x11, __getauxval@plt+0x36350
0.32 lsr x10, x0, #21
0.20 add x8, x8, x9
0.45 ldr x11, [x11, #2152]
0.56 lsr x8, x8, #21
0.49 ldr x11, [x11]
sub x9, x8, x10
0.35 cmp x9, #0x8
0.11 add x8, x11, x10
0.37 ↓ b.cs 3d4
0.33 mov x10, x9
0.01 ↓ b 654
3d4: cmp x9, #0x10
↓ b.cs 4cc
mov x12, xzr
3e0: movi v0.2d, #0xffffffffffffffff
add x14, x12, x10
and x13, x9, #0xfffffffffffffff8
add x14, x14, x11
and x10, x9, #0x7
add x8, x8, x13
sub x11, x12, x13
add x12, x14, #0x3
mov w14, #0xff // #255
↓ b 414
408: adds x11, x11, #0x8
add x12, x12, #0x8
↓ b.eq 4c0
414: ldur d1, [x12, #-3]
cmeq v1.8b, v1.8b, v0.8b
mvn v1.8b, v1.8b
umov w15, v1.b[0]
↓ tbnz w15, #0, 464
umov w15, v1.b[1]
↓ tbnz w15, #0, 470
430: umov w15, v1.b[2]
↓ tbnz w15, #0, 47c
438: umov w15, v1.b[3]
↓ tbnz w15, #0, 488
440: umov w15, v1.b[4]
↓ tbnz w15, #0, 494
448: umov w15, v1.b[5]
↓ tbnz w15, #0, 4a0
450: umov w15, v1.b[6]
↓ tbnz w15, #0, 4ac
458: umov w15, v1.b[7]
↑ tbz w15, #0, 408
↓ b 4b8
464: sturb w14, [x12, #-3]
umov w15, v1.b[1]
↑ tbz w15, #0, 430
470: sturb w14, [x12, #-2]
umov w15, v1.b[2]
↑ tbz w15, #0, 438
47c: sturb w14, [x12, #-1]
umov w15, v1.b[3]
↑ tbz w15, #0, 440
488: strb w14, [x12]
umov w15, v1.b[4]
↑ tbz w15, #0, 448
494: strb w14, [x12, #1]
umov w15, v1.b[5]
↑ tbz w15, #0, 450
4a0: strb w14, [x12, #2]
umov w15, v1.b[6]
↑ tbz w15, #0, 458
4ac: strb w14, [x12, #3]
umov w15, v1.b[7]
↑ tbz w15, #0, 408
4b8: strb w14, [x12, #4]
↑ b 408
4c0: cmp x9, x13
↑ b.eq 68
↓ b 654
4cc: movi v0.2d, #0xffffffffffffffff
and x12, x9, #0xfffffffffffffff0
add x13, x10, x11
add x13, x13, #0x7
mov w14, #0xff // #255
mov x15, x12
↓ b 4f4
4e8: subs x15, x15, #0x10
add x13, x13, #0x10
↓ b.eq 640
4f4: ldur q1, [x13, #-7]
cmeq v1.16b, v1.16b, v0.16b
mvn v1.16b, v1.16b
umov w16, v1.b[0]
↓ tbnz w16, #0, 584
umov w16, v1.b[1]
↓ tbnz w16, #0, 590
510: umov w16, v1.b[2]
↓ tbnz w16, #0, 59c
518: umov w16, v1.b[3]
↓ tbnz w16, #0, 5a8
520: umov w16, v1.b[4]
↓ tbnz w16, #0, 5b4
528: umov w16, v1.b[5]
↓ tbnz w16, #0, 5c0
530: umov w16, v1.b[6]
↓ tbnz w16, #0, 5cc
538: umov w16, v1.b[7]
↓ tbnz w16, #0, 5d8
540: umov w16, v1.b[8]
↓ tbnz w16, #0, 5e4
548: umov w16, v1.b[9]
↓ tbnz w16, #0, 5f0
550: umov w16, v1.b[10]
↓ tbnz w16, #0, 5fc
558: umov w16, v1.b[11]
↓ tbnz w16, #0, 608
560: umov w16, v1.b[12]
↓ tbnz w16, #0, 614
568: umov w16, v1.b[13]
↓ tbnz w16, #0, 620
570: umov w16, v1.b[14]
↓ tbnz w16, #0, 62c
578: umov w16, v1.b[15]
↑ tbz w16, #0, 4e8
↓ b 638
584: sturb w14, [x13, #-7]
umov w16, v1.b[1]
↑ tbz w16, #0, 510
590: sturb w14, [x13, #-6]
umov w16, v1.b[2]
↑ tbz w16, #0, 518
59c: sturb w14, [x13, #-5]
umov w16, v1.b[3]
↑ tbz w16, #0, 520
5a8: sturb w14, [x13, #-4]
umov w16, v1.b[4]
↑ tbz w16, #0, 528
5b4: sturb w14, [x13, #-3]
umov w16, v1.b[5]
↑ tbz w16, #0, 530
5c0: sturb w14, [x13, #-2]
umov w16, v1.b[6]
↑ tbz w16, #0, 538
5cc: sturb w14, [x13, #-1]
umov w16, v1.b[7]
↑ tbz w16, #0, 540
5d8: strb w14, [x13]
umov w16, v1.b[8]
↑ tbz w16, #0, 548
5e4: strb w14, [x13, #1]
umov w16, v1.b[9]
↑ tbz w16, #0, 550
5f0: strb w14, [x13, #2]
umov w16, v1.b[10]
↑ tbz w16, #0, 558
5fc: strb w14, [x13, #3]
umov w16, v1.b[11]
↑ tbz w16, #0, 560
608: strb w14, [x13, #4]
umov w16, v1.b[12]
↑ tbz w16, #0, 568
614: strb w14, [x13, #5]
umov w16, v1.b[13]
↑ tbz w16, #0, 570
620: strb w14, [x13, #6]
umov w16, v1.b[14]
↑ tbz w16, #0, 578
62c: strb w14, [x13, #7]
umov w16, v1.b[15]
↑ tbz w16, #0, 4e8
638: strb w14, [x13, #8]
↑ b 4e8
640: cmp x9, x12
↑ b.eq 68
↑ tbnz w9, #3, 3e0
add x8, x8, x12
and x10, x9, #0xf
0.43 654: mov w9, #0xff // #255
0.06 ↓ b 668
0.61 65c: subs x10, x10, #0x1
0.11 add x8, x8, #0x1
0.29 ↑ b.eq 68
0.55 668: ldrb w11, [x8]
0.48 cmp w11, #0xff
↑ b.eq 65c
strb w9, [x8]
↑ b 65c
67c: ↑ tbnz w10, #3, 138
and x11, x10, #0xf
add x9, x9, x13
0.60 688: mov w10, #0xff // #255
0.12 ↓ b 69c
0.42 690: subs x11, x11, #0x1
0.49 add x9, x9, #0x1
0.01 ↑ b.eq 3a0
0.33 69c: ldrb w12, [x9]
0.48 cmp w12, #0xff
↑ b.eq 690
strb w10, [x9]
↑ b 690
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5002500000, [percent: local period]
instance void [benchapp] Bench::CopyObjects10()[OptimizedTier1]() /tmp/jitted-48495-7316.so
Percent
Disassembly of section .text:
0000000000000080 <instance void [benchapp] Bench::CopyObjects10()[OptimizedTier1]>:
3.25 stp x29, x30, [sp, #-32]!
6.83 stp x19, x20, [sp, #16]
3.84 mov x29, sp
0.09 mov x0, #0x2d10 // #11536
3.93 movk x0, #0x7c00, lsl #16
0.09 movk x0, #0xf9cb, lsl #32
2.99 ldr x1, [x0]
3.33 ↓ cbz x1, bc
3.42 ldr x2, [x1]
mov x3, #0x2360 // #9056
2.82 movk x3, #0x890f, lsl #16
0.34 movk x3, #0xfa0b, lsl #32
3.84 cmp x2, x3
↓ b.ne c8
2.56 add x2, x1, #0x10
2.48 ldr w19, [x1, #8]
3.25 40: ldr x0, [x0, #16]
3.07 ↓ cbz x0, e0
2.90 ldr x1, [x0]
0.60 mov x3, #0x2360 // #9056
2.05 movk x3, #0x890f, lsl #16
1.71 movk x3, #0xfa0b, lsl #32
4.10 cmp x1, x3
↓ b.ne c8
3.84 add x1, x0, #0x10
4.87 ldr w20, [x0, #8]
2.99 68: cmp w19, w20
↓ b.gt ec
3.59 ubfiz x3, x19, #3, #32
0.60 cmp x3, #0x4, lsl #12
3.25 ↓ b.hi 98
4.10 mov x0, x1
3.59 mov x1, x2
2.73 mov x2, x3
3.67 bl fffffffffffcc338 <instance void [benchapp] Bench::CopyObjects10()[OptimizedTier1]+0xfffffffffffcc2b8>
5.81 8c: ldp x19, x20, [sp, #16]
3.50 ldp x29, x30, [sp], #32
← ret
98: mov x0, x1
mov x1, x2
mov x2, x3
mov x3, #0xc288 // #49800
movk x3, #0x89cf, lsl #16
movk x3, #0xfa0b, lsl #32
ldr x3, [x3]
→ blr x3
↑ b 8c
bc: mov x2, xzr
mov w19, wzr
↑ b 40
c8: mov x0, #0xc1f8 // #49656
movk x0, #0x89cf, lsl #16
movk x0, #0xfa0b, lsl #32
ldr x0, [x0]
→ blr x0
brk #0xf000
e0: mov x1, xzr
mov w20, wzr
↑ b 68
ec: mov x0, #0xeef8 // #61176
movk x0, #0x8965, lsl #16
movk x0, #0xfa0b, lsl #32
ldr x0, [x0]
→ blr x0
brk #0xf000
Samples: 10K of event 'cpu-clock', 1999 Hz, Event count (approx.): 5002500000, [percent: local period]
instance void [d3363f9a-18f9-4817-99ce-8e0ba5222ca5Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]() /tmp/jitted-48495-7276.so
Percent
Disassembly of section .text:
0000000000000080 <instance void [d3363f9a-18f9-4817-99ce-8e0ba5222ca5Emitted] BenchmarkDotNet.Autogenerated.Runnable_0::WorkloadActionUnroll(int64)[Optimized]>:
stp x29, x30, [sp, #-32]!
stp x19, x20, [sp, #16]
mov x29, sp
mov x19, x0
cmp x1, #0x0
↓ b.le 124
mov x20, x1
1c: ldr x1, [x19, #48]
0.92 ldr x0, [x1, #8]
0.46 ldr x1, [x1, #24]
0.92 → blr x1
3.23 ldr x1, [x19, #48]
2.30 ldr x0, [x1, #8]
1.38 ldr x1, [x1, #24]
1.84 → blr x1
2.76 ldr x1, [x19, #48]
1.38 ldr x0, [x1, #8]
0.46 ldr x1, [x1, #24]
→ blr x1
3.69 ldr x1, [x19, #48]
2.30 ldr x0, [x1, #8]
0.46 ldr x1, [x1, #24]
2.76 → blr x1
5.07 ldr x1, [x19, #48]
1.38 ldr x0, [x1, #8]
0.46 ldr x1, [x1, #24]
1.38 → blr x1
5.99 ldr x1, [x19, #48]
1.84 ldr x0, [x1, #8]
1.38 ldr x1, [x1, #24]
1.38 → blr x1
1.38 ldr x1, [x19, #48]
1.38 ldr x0, [x1, #8]
0.92 ldr x1, [x1, #24]
0.46 → blr x1
ldr x1, [x19, #48]
2.76 ldr x0, [x1, #8]
0.92 ldr x1, [x1, #24]
0.46 → blr x1
3.23 ldr x1, [x19, #48]
1.38 ldr x0, [x1, #8]
1.38 ldr x1, [x1, #24]
1.84 → blr x1
3.69 ldr x1, [x19, #48]
1.38 ldr x0, [x1, #8]
2.30 ldr x1, [x1, #24]
0.92 → blr x1
1.84 ldr x1, [x19, #48]
ldr x0, [x1, #8]
1.84 ldr x1, [x1, #24]
0.46 → blr x1
0.92 ldr x1, [x19, #48]
ldr x0, [x1, #8]
0.92 ldr x1, [x1, #24]
1.84 → blr x1
2.30 ldr x1, [x19, #48]
0.92 ldr x0, [x1, #8]
2.30 ldr x1, [x1, #24]
1.38 → blr x1
1.38 ldr x1, [x19, #48]
0.46 ldr x0, [x1, #8]
0.92 ldr x1, [x1, #24]
0.46 → blr x1
2.76 ldr x1, [x19, #48]
0.92 ldr x0, [x1, #8]
1.38 ldr x1, [x1, #24]
→ blr x1
0.46 ldr x1, [x19, #48]
2.30 ldr x0, [x1, #8]
0.92 ldr x1, [x1, #24]
0.92 → blr x1
3.23 sub x20, x20, #0x1
0.92 ↑ cbnz x20, 1c
124: ldp x19, x20, [sp, #16]
ldp x29, x30, [sp], #32
← ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment