Skip to content

Instantly share code, notes, and snippets.

@Wunkolo
Last active April 19, 2024 07:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Wunkolo/b715746f1599acf2c7943f9bcd2ef1fd to your computer and use it in GitHub Desktop.
Save Wunkolo/b715746f1599acf2c7943f9bcd2ef1fd to your computer and use it in GitHub Desktop.
#include <cstdint>
#include <cstdio>
#include <bitset>
#include <immintrin.h>
// Attempts at implementing _mm_srai_epi8, _mm_slli_epi8, and _mm_srli_epi8
// using affine galois field transformations(_mm_gf2p8affine_epi64_epi8, GNFI)
// Wed Nov 4 05:34:35 PM PST 2020 - wunkolo@gmail.com
inline __m128i _mm_srai_epi8(__m128i a, std::uint8_t imm8)
{
const std::uint64_t sign_extend = ~(0xFFFFFFFFFFFFFFFF << (imm8 * 8)) & 0x8080808080808080;
return _mm_gf2p8affine_epi64_epi8(
a,
// Perform a logical shift right, but shift in 0x80 bytes
// to replicate the most significant bit
_mm_set1_epi64x(
0x0102040810204080 << (imm8 * 8)
// Simulate shifting in 0x80 bits by doing a bit-wise OR
// against masked 0x80 bytes
| sign_extend
//| _bzhi_u64(0x8080808080808080ull, imm8 * 8) // BMI2
),
0
);
}
inline __m128i _mm_slli_epi8(__m128i a, std::uint8_t imm8)
{
return _mm_gf2p8affine_epi64_epi8(
a, _mm_set1_epi64x(0x0102040810204080 >> (imm8 * 8)), 0
);
}
inline __m128i _mm_srli_epi8(__m128i a, std::uint8_t imm8)
{
return _mm_gf2p8affine_epi64_epi8(
a, _mm_set1_epi64x(0x0102040810204080 << (imm8 * 8)), 0
);
}
void Print128(const __m128i& Vector)
{
const std::int8_t* Bytes = reinterpret_cast<const std::int8_t*>(&Vector);
std::putchar('\t'); for(std::size_t i=0;i<16;++i) std::printf("%s'",std::bitset<8>(Bytes[i]).to_string().c_str()); std::putchar('\n');
std::putchar('\t'); for(std::size_t i=0;i<16;++i) std::printf("%d'",Bytes[i]); std::putchar('\n');
}
int main()
{
{
const __m128i TestVector = _mm_set1_epi8(0b10010000);
std::puts("Input"); Print128(TestVector);
std::puts("SRA(3)"); Print128(_mm_srai_epi8(TestVector,3));
std::puts("SLL(3)"); Print128(_mm_slli_epi8(TestVector,3));
std::puts("SRL(3)"); Print128(_mm_srli_epi8(TestVector,3));
}
}
// Input
// 10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'
// -112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'
// SRA(3)
// 11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'
// -14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'
// SLL(3)
// 10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'
// -128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'
// SRL(3)
// 00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'
// 18'18'18'18'18'18'18'18'18'18'18'18'18'18'18'18'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment