Skip to content

Instantly share code, notes, and snippets.

@3outeille
Created September 8, 2022 09:09
Show Gist options
  • Save 3outeille/b7caebf6a6dcb5ee43b1d4465c03a632 to your computer and use it in GitHub Desktop.
Save 3outeille/b7caebf6a6dcb5ee43b1d4465c03a632 to your computer and use it in GitHub Desktop.
fg_compute_block_avg_sse4
int16_t fg_compute_block_avg_sse4(int16_t *dstSampleBlk8, uint32_t widthComp, uint16_t *pNumSamples,
uint8_t ySize, uint8_t xSize, uint8_t bitDepth)
{
uint16_t blockAvg = 0;
uint16_t numSamples = 0;
__m128i acc = _mm_setzero_si128();
for (int i = 0; i < ySize; i+=1, numSamples+=8)
{
__m128i x = _mm_loadu_si128(&dstSampleBlk8[i*widthComp]);
acc = _mm_adds_epi16(acc, x);
}
if (numSamples > 0)
{
acc = _mm_hadd_epi16(acc, acc);
acc = _mm_hadd_epi16(acc, acc);
acc = _mm_hadd_epi16(acc, acc);
blockAvg = _mm_cvtsi128_si32(acc);
blockAvg /= numSamples;
blockAvg >>= (bitDepth - 8); /* to handle high bit depths */
}
// assert(blockAvg < (1 << 8));
*pNumSamples = numSamples;
// blockAvg = (int16_t) OVMIN(OVMAX(0, blockAvg), (1 << 8) - 1 );
blockAvg = (int16_t) ov_clip_uintp2((uint32_t)blockAvg, 8);
return blockAvg;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment