Created
September 8, 2022 09:09
-
-
Save 3outeille/b7caebf6a6dcb5ee43b1d4465c03a632 to your computer and use it in GitHub Desktop.
fg_compute_block_avg_sse4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int16_t fg_compute_block_avg_sse4(int16_t *dstSampleBlk8, uint32_t widthComp, uint16_t *pNumSamples, | |
uint8_t ySize, uint8_t xSize, uint8_t bitDepth) | |
{ | |
uint16_t blockAvg = 0; | |
uint16_t numSamples = 0; | |
__m128i acc = _mm_setzero_si128(); | |
for (int i = 0; i < ySize; i+=1, numSamples+=8) | |
{ | |
__m128i x = _mm_loadu_si128(&dstSampleBlk8[i*widthComp]); | |
acc = _mm_adds_epi16(acc, x); | |
} | |
if (numSamples > 0) | |
{ | |
acc = _mm_hadd_epi16(acc, acc); | |
acc = _mm_hadd_epi16(acc, acc); | |
acc = _mm_hadd_epi16(acc, acc); | |
blockAvg = _mm_cvtsi128_si32(acc); | |
blockAvg /= numSamples; | |
blockAvg >>= (bitDepth - 8); /* to handle high bit depths */ | |
} | |
// assert(blockAvg < (1 << 8)); | |
*pNumSamples = numSamples; | |
// blockAvg = (int16_t) OVMIN(OVMAX(0, blockAvg), (1 << 8) - 1 ); | |
blockAvg = (int16_t) ov_clip_uintp2((uint32_t)blockAvg, 8); | |
return blockAvg; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment