Skip to content

Instantly share code, notes, and snippets.

@rygorous
Created February 3, 2023 07:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rygorous/ee10009e73e67eb5aa0e1c1c5cd9d3bf to your computer and use it in GitHub Desktop.
Save rygorous/ee10009e73e67eb5aa0e1c1c5cd9d3bf to your computer and use it in GitHub Desktop.
Multigetbits, the first
// Returns 8 bit fields at the given positions (in bits) and of the
// given widths as 16-bit integers, with the values aligned with the
// MSB at the top and garbage in the lower-order bits.
//
// The individual lens must be <=8, the positions are bit offsets
// into the 128-bit "bytes".
template<
int pos0, int len0,
int pos1, int len1,
int pos2, int len2,
int pos3, int len3,
int pos4, int len4,
int pos5, int len5,
int pos6, int len6,
int pos7, int len7
>
static inline Vec128_U16 simd_multigetbits(const Vec128_U8& bytes)
{
// Grab the two bytes straddling each field
const Vec128_U8 shuf {
(pos0 >> 3), (pos0 >> 3) + 1,
(pos1 >> 3), (pos1 >> 3) + 1,
(pos2 >> 3), (pos2 >> 3) + 1,
(pos3 >> 3), (pos3 >> 3) + 1,
(pos4 >> 3), (pos4 >> 3) + 1,
(pos5 >> 3), (pos5 >> 3) + 1,
(pos6 >> 3), (pos6 >> 3) + 1,
(pos7 >> 3), (pos7 >> 3) + 1
};
// Use a multiply to do a per-lane variable shift to align the
// desired bits at the top
const Vec128_U16 mult {
1 << (16 - len0 - (pos0 & 7)),
1 << (16 - len1 - (pos1 & 7)),
1 << (16 - len2 - (pos2 & 7)),
1 << (16 - len3 - (pos3 & 7)),
1 << (16 - len4 - (pos4 & 7)),
1 << (16 - len5 - (pos5 & 7)),
1 << (16 - len6 - (pos6 & 7)),
1 << (16 - len7 - (pos7 & 7))
};
// So far, all we've done is set up literal constants!
// Actual code is extremely short:
Vec128_U8 shuffled = bytes.shuf(shuf); // PSHUFB
Vec128_U16 result = shuffled.u16() * mult; // PMULLW
return result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment