-
-
Save Megawats777/40a3dd389c2fa9e9b9ad5494762dcecd to your computer and use it in GitHub Desktop.
AarcheProfileResults-01
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Same use case of decode a 1080p video 1000 times. | |
This was done using the "Perf" tool | |
Compiler settings | |
- opt level: -Og | |
- -g | |
------------------------------------------------------------------ | |
Run #1: | |
Expensive Functions: | |
10.31% h264dec libc-2.27.so [.] __memcpy_generic | |
8.16% h264dec libc-2.27.so [.] __GI___memset_generic | |
6.73% h264dec h264dec [.] WelsDec::WelsResidualBlockCavlc | |
4.81% h264dec h264dec [.] WelsDec::CavlcGetLevelVal | |
__memcpy_generic - Expensive Lines: | |
6.16 │ ldp x6, x7, [x1, #16] | |
13.58 │ stp x8, x9, [x3, #32] | |
6.34 │ ldp x8, x9, [x1, #32] | |
15.58 │ stp x10, x11, [x3, #48] | |
7.39 │ ldp x10, x11, [x1, #48] | |
16.48 │ stp x12, x13, [x3, #64]! | |
7.19 │ ldp x12, x13, [x1, #64]! | |
0.04 │ subs x2, x2, #0x40 | |
15.76 │ ↑ b.hi 110 | |
__GI___memset_generic - Expensive Lines: | |
11.64 │100: dc zva, x3 | |
52.78 │ add x3, x3, #0x40 | |
│ subs x2, x2, #0x40 | |
10.26 │ ↑ b.hi 100 | |
WelsDec::WelsResidualBlockCavlc - Expensive Lines: | |
│ for (i = uiTotalCoeff - 1; i >= 0; --i) { //FIXME merge into rundecode? | |
1.11 │ sub w0, w0, #0x1 | |
0.34 │ ↓ tbnz w0, #31, 5a0 | |
│ iCoeffNum += iRun[i] + 1; //FIXME add 1 earlier ? | |
1.40 │ add x1, sp, #0xd8 | |
2.11 │ ldr w1, [x1, w0, sxtw #2] | |
2.11 │ add w1, w1, #0x1 | |
1.09 │ add w3, w3, w1 | |
│ j = kpZigzagTable[ iCoeffNum ]; | |
3.20 │ ldrb w1, [x26, w3, sxtw] | |
│ if (!pCtx->bUseScalingList) { | |
0.19 │ add x2, x22, #0x8a, lsl #12 | |
2.83 │ ldrb w2, [x2, #3565] | |
2.15 │ ↑ cbnz w2, 44514c <WelsDec::WelsResidualBlockCavlc(WelsDec::TagVlcTable*, 4f0 | |
│ pTCoeff[j] = (iLevel[i] * kpDequantCoeff[j & 0x07]); | |
0.03 │ add x2, sp, #0x98 | |
2.14 │ ldr w2, [x2, w0, sxtw #2] | |
│ ubfiz x4, x1, #1, #3 | |
1.15 │ ubfiz x1, x1, #1, #8 | |
2.17 │ ldrh w4, [x20, x4] | |
3.56 │ mul w2, w2, w4 | |
1.03 │ strh w2, [x23, x1] | |
WelsDec::CavlcGetLevelVal - Expensive Lines: | |
2.37 │ ldr w7, [x1] | |
│ GetPrefixBits(): | |
2.46 │ ands w11, w7, #0xffff0000 | |
0.28 │ → b.eq 442e34 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x94> // b.none | |
│ uiValue >>= 16; | |
0.81 │ lsr w8, w7, #16 | |
│ iNumBit += 16; | |
0.29 │ mov w11, #0x10 // #16 | |
│ } | |
│ if (uiValue & 0xff00) { | |
0.80 │ tst w8, #0xff00 | |
0.82 │ → b.eq 442f8c <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x1ec> // b.none | |
│ uiValue >>= 8; | |
0.91 │ lsr w8, w8, #8 | |
│ iNumBit += 8; | |
0.26 │ add w11, w11, #0x8 | |
│ } | |
│ | |
│ if (uiValue & 0xf0) { | |
0.97 │ tst w8, #0xf0 | |
0.38 │ → b.eq 442f9c <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x1fc> // b.none | |
│ uiValue >>= 4; | |
0.86 │ lsr w8, w8, #4 | |
│ iNumBit += 4; | |
0.30 │ add w11, w11, #0x4 | |
│ } | |
│ iNumBit += g_kuiPrefix8BitsTable[uiValue]; | |
1.16 │ adrp x4, WelsDec::g_ksInterBSubMbTypeInfo+0x60 | |
0.43 │ add x4, x4, #0x28 | |
2.90 │ ldr w4, [x4, w8, uxtw #2] | |
3.07 │ add w11, w4, w11 | |
│ | |
│ return (32 - iNumBit); | |
│ mov w4, #0x20 // #32 | |
1.11 │ sub w4, w4, w11 | |
│ _ZN7WelsDecL16CavlcGetLevelValEPiPNS_16TagReadBitsCacheEhh(): | |
│ if (iPrefixBits > MAX_LEVEL_PREFIX + 1) //iPrefixBits includes leading "0"s and first "1", should +1 | |
1.10 │ cmp w4, #0x10 | |
0.04 │ → b.gt 44304c <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x2ac> | |
│ POP_BUFFER (pBitsCache, iPrefixBits); | |
1.11 │ lsl w8, w7, w4 | |
0.25 │ str w8, [x1] | |
1.16 │ ldrb w7, [x1, #4] | |
2.23 │ sub w7, w7, w4 | |
1.05 │ and w7, w7, #0xff | |
0.02 │ strb w7, [x1, #4] | |
│ iUsedBits += iPrefixBits; | |
0.98 │ add w0, w0, w4 | |
│ iLevelPrefix = iPrefixBits - 1; | |
0.03 │ sub w4, w4, #0x1 | |
│ iLevelCode = iLevelPrefix << iSuffixLength; //differ | |
1.11 │ lsl w12, w4, w6 | |
0.25 │ str w8, [x1] | |
1.16 │ ldrb w7, [x1, #4] | |
2.23 │ sub w7, w7, w4 | |
1.05 │ and w7, w7, #0xff | |
0.02 │ strb w7, [x1, #4] | |
│ iUsedBits += iPrefixBits; | |
0.98 │ add w0, w0, w4 | |
│ iLevelPrefix = iPrefixBits - 1; | |
0.03 │ sub w4, w4, #0x1 | |
│ iLevelCode = iLevelPrefix << iSuffixLength; //differ | |
1.11 │ lsl w12, w4, w6 | |
│ if (iLevelPrefix >= 14) { | |
0.04 │ cmp w4, #0xd | |
1.07 │ → b.le 442e3c <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x9c> | |
│ if (14 == iLevelPrefix && 0 == iSuffixLength) | |
0.09 │ cmp w4, #0xe | |
0.01 │ cset w11, eq // eq = none | |
0.03 │ cmp w6, #0x0 | |
0.02 │ csel w11, w11, wzr, eq // eq = none | |
0.02 │ ↓ cbnz w11, 443014 <WelsDec::CavlcGetLevelVal(int*, 274 | |
│ else if (15 == iLevelPrefix) { | |
│ cmp w4, #0xf | |
0.26 │ → b.ne 442e3c <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x9c> // b.any | |
│ if (iSuffixLength == 0) | |
0.01 │ ↓ cbnz w6, 44301c <WelsDec::CavlcGetLevelVal(int*, 27c | |
│ iLevelCode += 15; | |
│ add w12, w12, #0xf | |
│ iSuffixLengthSize = 12; | |
0.02 │ mov w11, #0xc // #12 | |
0.02 │ → b 442e48 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0xa8> | |
│ iSuffixLengthSize = 4; | |
0.04 │274: mov w11, #0x4 // #4 | |
0.01 │ → b 442e48 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0xa8> | |
│ iSuffixLengthSize = 12; | |
│27c: mov w11, #0xc // #12 | |
│ → b 442e48 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0xa8> | |
│ iLevelCode += ((i == uiTrailingOnes) && (uiTrailingOnes < 3)) << 1; | |
1.39 │ cmp w3, #0x2 | |
0.15 │ → b.ls 443034 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x294> // b.plast | |
0.38 │ mov w4, #0x0 // #0 | |
0.08 │ → b 442ec0 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x120> | |
0.76 │ mov w4, #0x1 // #1 | |
0.15 │ → b 442ec0 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x120> | |
│ iSuffixLength += ((iLevel[i] > iThreshold) || (iLevel[i] < -iThreshold)) && (iSuffixLength < 6); | |
1.03 │ mov w4, #0x0 // #0 | |
0.04 │ → b 442f10 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x170> | |
0.30 │ mov w4, #0x1 // #1 | |
0.08 │ → b 442f10 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x170> | |
│ return -1; | |
│ mov w0, #0xffffffff // #-1 | |
│ } | |
│ | |
│ return iUsedBits; | |
│ } | |
2.08 │ adrp x1, __FRAME_END__+0x16798 | |
│ ldr x1, [x1, #2776] | |
0.63 │ ldr x2, [sp, #24] | |
0.66 │ ldr x1, [x1] | |
0.73 │ eor x1, x2, x1 | |
0.33 │ ↓ cbnz x1, 443070 <WelsDec::CavlcGetLevelVal(int*, 2d0 | |
0.48 │ ldp x29, x30, [sp], #32 | |
0.72 │ ← ret | |
------------------------------------------------------------------ | |
Run #2: | |
Expensive Functions: | |
10.34% h264dec libc-2.27.so [.] __memcpy_generic | |
8.16% h264dec libc-2.27.so [.] __GI___memset_generic | |
6.76% h264dec h264dec [.] WelsDec::WelsResidualBlockCavlc | |
4.78% h264dec h264dec [.] WelsDec::CavlcGetLevelVal | |
__memcpy_generic - Expensive Lines: | |
6.34 │ ldp x6, x7, [x1, #16] | |
13.49 │ stp x8, x9, [x3, #32] | |
6.37 │ ldp x8, x9, [x1, #32] | |
15.66 │ stp x10, x11, [x3, #48] | |
7.38 │ ldp x10, x11, [x1, #48] | |
16.47 │ stp x12, x13, [x3, #64]! | |
7.08 │ ldp x12, x13, [x1, #64]! | |
0.04 │ subs x2, x2, #0x40 | |
15.88 │ ↑ b.hi 110 | |
__GI___memset_generic - Expensive Lines: | |
11.60 │100: dc zva, x3 | |
52.57 │ add x3, x3, #0x40 | |
│ subs x2, x2, #0x40 | |
10.40 │ ↑ b.hi 100 | |
WelsDec::WelsResidualBlockCavlc - Expensive Lines: | |
1.10 │ sub w0, w0, #0x1 | |
0.30 │ ↓ tbnz w0, #31, 5a0 | |
│ iCoeffNum += iRun[i] + 1; //FIXME add 1 earlier ? | |
1.35 │ add x1, sp, #0xd8 | |
2.25 │ ldr w1, [x1, w0, sxtw #2] | |
2.21 │ add w1, w1, #0x1 | |
1.14 │ add w3, w3, w1 | |
│ j = kpZigzagTable[ iCoeffNum ]; | |
3.33 │ ldrb w1, [x26, w3, sxtw] | |
│ if (!pCtx->bUseScalingList) { | |
0.20 │ add x2, x22, #0x8a, lsl #12 | |
2.77 │ ldrb w2, [x2, #3565] | |
2.21 │ ↑ cbnz w2, 44514c <WelsDec::WelsResidualBlockCavlc(WelsDec::TagVlcTable*, 4f0 | |
│ pTCoeff[j] = (iLevel[i] * kpDequantCoeff[j & 0x07]); | |
0.03 │ add x2, sp, #0x98 | |
2.09 │ ldr w2, [x2, w0, sxtw #2] | |
│ ubfiz x4, x1, #1, #3 | |
1.23 │ ubfiz x1, x1, #1, #8 | |
2.16 │ ldrh w4, [x20, x4] | |
3.50 │ mul w2, w2, w4 | |
1.03 │ strh w2, [x23, x1] | |
WelsDec::CavlcGetLevelVal - Expensive Lines: | |
1.81 │ cmp w10, w5 | |
1.02 │ → b.eq 443024 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x284> // b.none | |
0.10 │ mov w4, #0x0 // #0 | |
1.68 │ add w4, w12, w4, lsl #1 | |
│ iLevel[i] = ((iLevelCode + 2) >> 1); | |
1.12 │ add w7, w4, #0x2 | |
2.21 │ asr w7, w7, #1 | |
│ iLevel[i] -= (iLevel[i] << 1) & (- (iLevelCode & 0x01)); | |
│ sbfx x4, x4, #0, #1 | |
2.26 │ and w4, w4, w7, lsl #1 | |
1.11 │ sub w4, w7, w4 | |
0.01 │ str w4, [x9, w5, sxtw #2] | |
│ | |
│ iSuffixLength += !iSuffixLength; | |
1.11 │ cmp w6, #0x0 | |
0.02 │ cinc w6, w6, eq // eq = none | |
│ iThreshold = 3 << (iSuffixLength - 1); | |
2.25 │ sub w8, w6, #0x1 | |
0.01 │ mov w7, #0x3 // #3 | |
2.18 │ lsl w7, w7, w8 | |
│ iSuffixLength += ((iLevel[i] > iThreshold) || (iLevel[i] < -iThreshold)) && (iSuffixLength < 6); | |
0.02 │ cmp w4, w7 | |
0.99 │ → b.gt 442f04 <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x164> | |
0.09 │ neg w7, w7 | |
0.92 │ cmp w4, w7 | |
0.01 │ → b.ge 44303c <WelsDec::CavlcGetLevelVal(int*, WelsDec::TagReadBitsCache*, unsigned char, unsigned char)+0x29c> // b.tcont | |
0.56 │ cmp w6, #0x5 | |
3.02 │ ldr w4, [x4, w8, uxtw #2] | |
3.13 │ add w11, w4, w11 | |
2.14 │ adrp x1, __FRAME_END__+0x16798 | |
│ ldr x1, [x1, #2776] | |
0.53 │ ldr x2, [sp, #24] | |
0.57 │ ldr x1, [x1] | |
0.72 │ eor x1, x2, x1 | |
0.34 │ ↓ cbnz x1, 443070 <WelsDec::CavlcGetLevelVal(int*, 2d0 | |
0.48 │ ldp x29, x30, [sp], #32 | |
0.70 │ ← ret | |
│2d0: → bl __stack_chk_fail@plt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment