Created
February 25, 2021 00:04
-
-
Save Const-me/f6d045e51f7304f256f3963713fde554 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Store 10-bit pieces from 16-bit lanes of the AVX2 vector, with truncation. | |
// The function writes 20 bytes to the pointer. | |
inline void store_10x16_avx2( __m256i v, uint8_t* rdi ) | |
{ | |
__m256i low, high; | |
// Pack pairs of 10 bits into 20 | |
low = _mm256_slli_epi16( v, 6 ); | |
v = _mm256_blend_epi16( v, low, 0b01010101 ); | |
// Now the vector contains 32-bit lanes with 20 payload bits / each in the middle of them | |
low = _mm256_slli_epi32( v, 6 ); | |
high = _mm256_srli_epi32( v, 6 ); | |
v = _mm256_blend_epi32( high, low, 0b01010101 ); | |
// Now the vector contains 64-bit lanes with 40 payload bits / each in the middle of them | |
// One last shift to move to lower positions. | |
v = _mm256_srli_epi64( v, 12 ); | |
// Now the vector contains 64-bit lanes with 40 payload bits / each | |
// 40 bits = 5 bytes, store initial 4 bytes of the result | |
_mm_storeu_si32( rdi, _mm256_castsi256_si128( v ) ); | |
// Shuffle the remaining 16 bytes of payload into correct positions. | |
const __m256i shuffleIndices = _mm256_setr_epi8( | |
// Lower half | |
4, 8, 9, 10, 11, 12, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
// Higher half | |
-1, -1, -1, -1, -1, -1, | |
0, 1, 2, 3, 4, | |
8, 9, 10, 11, 12 | |
); | |
v = _mm256_shuffle_epi8( v, shuffleIndices ); | |
// Combine and store the final 16 bytes of payload | |
const __m128i low16 = _mm256_castsi256_si128( v ); | |
const __m128i high16 = _mm256_extracti128_si256( v, 1 ); | |
const __m128i result = _mm_or_si128( low16, high16 ); | |
_mm_storeu_si128( ( __m128i* )( rdi + 4 ), result ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment