Skip to content

Instantly share code, notes, and snippets.

@Megawats777
Created April 9, 2020 00:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Megawats777/46bf0beba2a85dc420456f3f632f0746 to your computer and use it in GitHub Desktop.
Save Megawats777/46bf0beba2a85dc420456f3f632f0746 to your computer and use it in GitHub Desktop.
BbettyProfileResults-01
Same use case of decode a 1080p video 1000 times.
This was done using the "Perf" tool
Compiler settings
- opt level: -Og
- -g
------------------------------------------------------------------
Run #1:
Expensive Functions:
11.30% h264dec libc-2.30.so [.] __GI___memset_generic
10.33% h264dec libc-2.30.so [.] __memcpy_generic
5.14% h264dec h264dec [.] WelsDec::CavlcGetLevelVal
4.79% h264dec h264dec [.] WelsDec::WelsResidualBlockCavlc
__GI___memset_generic - Expensive Lines:
6.93 │ a8: stp q0, q0, [x3, #32]
6.04 │ stp q0, q0, [x3, #64]!
60.16 │100: dc zva, x3
│ add x3, x3, #0x40
│ subs x2, x2, #0x40
│ ↑ b.hi 100
5.77 │ stp q0, q0, [x3]
5.90 │ stp q0, q0, [x3, #32]
7.07 │ stp q0, q0, [x4, #-64]
__memcpy_generic - Expensive Lines:
12.79 │110: stp x6, x7, [x3, #16]
7.07 │ ldp x6, x7, [x1, #16]
13.55 │ stp x8, x9, [x3, #32]
8.84 │ ldp x8, x9, [x1, #32]
13.71 │ stp x10, x11, [x3, #48]
10.91 │ ldp x10, x11, [x1, #48]
13.14 │ stp x12, x13, [x3, #64]!
9.22 │ ldp x12, x13, [x1, #64]!
WelsDec::CavlcGetLevelVal - Expensive Lines:
7.96 │ neg w7, w7
6.27 │ ldrb w4, [x1, #4]
6.06 │ ldr w7, [x1]
9.31 │ ldr w8, [x11, w8, uxtw #2]
│ add w8, w8, w4
│ return (32 - iNumBit);
│ mov w4, #0x20 // #32
│ sub w4, w4, w8
│ _ZN7WelsDecL16CavlcGetLevelValEPiPNS_16TagReadBitsCacheEhh():
│ if (iPrefixBits > MAX_LEVEL_PREFIX + 1) //iPrefixBits includes leading "0"s and first "1", should +1
│ cmp w4, #0x10
│ → b.gt 442e5c <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x2b0>
│ POP_BUFFER (pBitsCache, iPrefixBits);
6.70 │ lsl w8, w7, w4
3.59 │ adrp x1, __FRAME_END__+0x16ab0
3.02 │ ldr x1, [x1, #2776]
0.01 │ ldr x2, [sp, #24]
1.87 │ ldr x1, [x1]
│ eor x1, x2, x1
│ ↓ cbnz x1, 442e80 <WelsDec::CavlcGetLevelVal(int*, 2d4
1.46 │ ldp x29, x30, [sp], #32
│ ← ret
│2d4: → bl __stack_chk_fail@plt
WelsDec::WelsResidualBlockCavlc - Expensive Lines
3.89 │ ldrb w1, [x24, w3, sxtw]
6.36 │ ldrh w4, [x21, x4]
------------------------------------------------------------------
Run #2:
Expensive Functions:
11.28% h264dec libc-2.30.so [.] __GI___memset_generic
10.18% h264dec libc-2.30.so [.] __memcpy_generic
5.16% h264dec h264dec [.] WelsDec::CavlcGetLevelVal
4.86% h264dec h264dec [.] WelsDec::WelsResidualBlockCavlc
__GI___memset_generic - Expensive Lines
6.95 │ a8: stp q0, q0, [x3, #32]
6.04 │ stp q0, q0, [x3, #64]!
60.11 │100: dc zva, x3
│ add x3, x3, #0x40
│ subs x2, x2, #0x40
│ ↑ b.hi 100
5.82 │ stp q0, q0, [x3]
5.87 │ stp q0, q0, [x3, #32]
6.97 │ stp q0, q0, [x4, #-64]
__memcpy_generic - Expensive Lines
12.69 │110: stp x6, x7, [x3, #16]
6.81 │ ldp x6, x7, [x1, #16]
13.43 │ stp x8, x9, [x3, #32]
8.80 │ ldp x8, x9, [x1, #32]
13.50 │ stp x10, x11, [x3, #48]
11.16 │ ldp x10, x11, [x1, #48]
12.97 │ stp x12, x13, [x3, #64]!
9.53 │ ldp x12, x13, [x1, #64]!
WelsDec::CavlcGetLevelVal - Expensive Lines
7.84 │ neg w7, w7
6.62 │ ldrb w4, [x1, #4]
6.20 │ ldr w7, [x1]
9.25 │ ldr w8, [x11, w8, uxtw #2]
│ add w8, w8, w4
│ return (32 - iNumBit);
│ mov w4, #0x20 // #32
│ sub w4, w4, w8
│ _ZN7WelsDecL16CavlcGetLevelValEPiPNS_16TagReadBitsCacheEhh():
│ if (iPrefixBits > MAX_LEVEL_PREFIX + 1) //iPrefixBits includes leading "0"s and first "1", should +1
│ cmp w4, #0x10
│ → b.gt 442e5c <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x2b0>
│ POP_BUFFER (pBitsCache, iPrefixBits);
6.32 │ lsl w8, w7, w4
3.70 │ adrp x1, __FRAME_END__+0x16ab0
2.94 │ ldr x1, [x1, #2776]
0.00 │ ldr x2, [sp, #24]
1.89 │ ldr x1, [x1]
│ eor x1, x2, x1
│ ↓ cbnz x1, 442e80 <WelsDec::CavlcGetLevelVal(int*, 2d4
1.46 │ ldp x29, x30, [sp], #32
│ ← ret
│2d4: → bl __stack_chk_fail@plt
WelsDec::WelsResidualBlockCavlc - Expensive Lines
6.45 │ ldrh w4, [x21, x4]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment