Skip to content

Instantly share code, notes, and snippets.

Created February 17, 2014 21:50
Embed
What would you like to do?
Sketch for AVX-2 renorm
static uint8_t const shuffles[64][8] = { // align suitably.
{ 0,0,0,0,0,0,0,0 }, // 000000
{ 0,1,1,1,1,1,1,1 }, // 000001
// ...
// in general: shuffles[i][j] = popcnt(i & ((1 << j) - 1))
};
__m256i x = *r;
__m256i x_biased = _mm256_xor_si256(x, _mm256_set1_epi32((int) 0x80000000));
__m256i greater = _mm256_cmpgt_epi32(_mm256_set1_epi32(RANS_WORD_L - 0x80000000), x_biased);
int mask = _mm256_movemask_ps(_mm256_castsi256_ps(greater));
__m256i memvals = _mm256_cvtepu16_epi32(_mm_loadu_si128((const __m128i *)*pptr));
__m256i shuffle = _mm256_cvtepu8_epi32(_mm_loadl_epi64((const __m128i *)shuffles[mask]));
__m256i xshifted = _mm256_slli_epi32(x, 16);
__m256i newx = _mm256_or_si256(xshifted, _mm256_permutevar8x32_epi32(memvals, shuffle));
*r = _mm256_blendv_epi8(x, newx, greater);
*pptr += __popcnt(mask);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment