-
-
Save Megawats777/46bf0beba2a85dc420456f3f632f0746 to your computer and use it in GitHub Desktop.
BbettyProfileResults-01
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Same use case of decode a 1080p video 1000 times. | |
This was done using the "Perf" tool | |
Compiler settings | |
- opt level: -Og | |
- -g | |
------------------------------------------------------------------ | |
Run #1: | |
Expensive Functions: | |
11.30% h264dec libc-2.30.so [.] __GI___memset_generic | |
10.33% h264dec libc-2.30.so [.] __memcpy_generic | |
5.14% h264dec h264dec [.] WelsDec::CavlcGetLevelVal | |
4.79% h264dec h264dec [.] WelsDec::WelsResidualBlockCavlc | |
__GI___memset_generic - Expensive Lines: | |
6.93 │ a8: stp q0, q0, [x3, #32] | |
6.04 │ stp q0, q0, [x3, #64]! | |
60.16 │100: dc zva, x3 | |
│ add x3, x3, #0x40 | |
│ subs x2, x2, #0x40 | |
│ ↑ b.hi 100 | |
5.77 │ stp q0, q0, [x3] | |
5.90 │ stp q0, q0, [x3, #32] | |
7.07 │ stp q0, q0, [x4, #-64] | |
__memcpy_generic - Expensive Lines: | |
12.79 │110: stp x6, x7, [x3, #16] | |
7.07 │ ldp x6, x7, [x1, #16] | |
13.55 │ stp x8, x9, [x3, #32] | |
8.84 │ ldp x8, x9, [x1, #32] | |
13.71 │ stp x10, x11, [x3, #48] | |
10.91 │ ldp x10, x11, [x1, #48] | |
13.14 │ stp x12, x13, [x3, #64]! | |
9.22 │ ldp x12, x13, [x1, #64]! | |
WelsDec::CavlcGetLevelVal - Expensive Lines: | |
7.96 │ neg w7, w7 | |
6.27 │ ldrb w4, [x1, #4] | |
6.06 │ ldr w7, [x1] | |
9.31 │ ldr w8, [x11, w8, uxtw #2] | |
│ add w8, w8, w4 | |
│ | |
│ return (32 - iNumBit); | |
│ mov w4, #0x20 // #32 | |
│ sub w4, w4, w8 | |
│ _ZN7WelsDecL16CavlcGetLevelValEPiPNS_16TagReadBitsCacheEhh(): | |
│ if (iPrefixBits > MAX_LEVEL_PREFIX + 1) //iPrefixBits includes leading "0"s and first "1", should +1 | |
│ cmp w4, #0x10 | |
│ → b.gt 442e5c <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x2b0> | |
│ POP_BUFFER (pBitsCache, iPrefixBits); | |
6.70 │ lsl w8, w7, w4 | |
3.59 │ adrp x1, __FRAME_END__+0x16ab0 | |
3.02 │ ldr x1, [x1, #2776] | |
0.01 │ ldr x2, [sp, #24] | |
1.87 │ ldr x1, [x1] | |
│ eor x1, x2, x1 | |
│ ↓ cbnz x1, 442e80 <WelsDec::CavlcGetLevelVal(int*, 2d4 | |
1.46 │ ldp x29, x30, [sp], #32 | |
│ ← ret | |
│2d4: → bl __stack_chk_fail@plt | |
WelsDec::WelsResidualBlockCavlc - Expensive Lines | |
3.89 │ ldrb w1, [x24, w3, sxtw] | |
6.36 │ ldrh w4, [x21, x4] | |
------------------------------------------------------------------ | |
Run #2: | |
Expensive Functions: | |
11.28% h264dec libc-2.30.so [.] __GI___memset_generic | |
10.18% h264dec libc-2.30.so [.] __memcpy_generic | |
5.16% h264dec h264dec [.] WelsDec::CavlcGetLevelVal | |
4.86% h264dec h264dec [.] WelsDec::WelsResidualBlockCavlc | |
__GI___memset_generic - Expensive Lines | |
6.95 │ a8: stp q0, q0, [x3, #32] | |
6.04 │ stp q0, q0, [x3, #64]! | |
60.11 │100: dc zva, x3 | |
│ add x3, x3, #0x40 | |
│ subs x2, x2, #0x40 | |
│ ↑ b.hi 100 | |
5.82 │ stp q0, q0, [x3] | |
5.87 │ stp q0, q0, [x3, #32] | |
6.97 │ stp q0, q0, [x4, #-64] | |
__memcpy_generic - Expensive Lines | |
12.69 │110: stp x6, x7, [x3, #16] | |
6.81 │ ldp x6, x7, [x1, #16] | |
13.43 │ stp x8, x9, [x3, #32] | |
8.80 │ ldp x8, x9, [x1, #32] | |
13.50 │ stp x10, x11, [x3, #48] | |
11.16 │ ldp x10, x11, [x1, #48] | |
12.97 │ stp x12, x13, [x3, #64]! | |
9.53 │ ldp x12, x13, [x1, #64]! | |
WelsDec::CavlcGetLevelVal - Expensive Lines | |
7.84 │ neg w7, w7 | |
6.62 │ ldrb w4, [x1, #4] | |
6.20 │ ldr w7, [x1] | |
9.25 │ ldr w8, [x11, w8, uxtw #2] | |
│ add w8, w8, w4 | |
│ | |
│ return (32 - iNumBit); | |
│ mov w4, #0x20 // #32 | |
│ sub w4, w4, w8 | |
│ _ZN7WelsDecL16CavlcGetLevelValEPiPNS_16TagReadBitsCacheEhh(): | |
│ if (iPrefixBits > MAX_LEVEL_PREFIX + 1) //iPrefixBits includes leading "0"s and first "1", should +1 | |
│ cmp w4, #0x10 | |
│ → b.gt 442e5c <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x2b0> | |
│ POP_BUFFER (pBitsCache, iPrefixBits); | |
6.32 │ lsl w8, w7, w4 | |
3.70 │ adrp x1, __FRAME_END__+0x16ab0 | |
2.94 │ ldr x1, [x1, #2776] | |
0.00 │ ldr x2, [sp, #24] | |
1.89 │ ldr x1, [x1] | |
│ eor x1, x2, x1 | |
│ ↓ cbnz x1, 442e80 <WelsDec::CavlcGetLevelVal(int*, 2d4 | |
1.46 │ ldp x29, x30, [sp], #32 | |
│ ← ret | |
│2d4: → bl __stack_chk_fail@plt | |
WelsDec::WelsResidualBlockCavlc - Expensive Lines | |
6.45 │ ldrh w4, [x21, x4] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment