Skip to content

Instantly share code, notes, and snippets.

@lemire
Created May 5, 2020
Embed
What would you like to do?
make_uint8x16_t
namespace {
/**
* make_uint8x16_t initializes a SIMD register (uint8x16_t).
* This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...}
* is not recognized under Visual Studio! This is a workaround.
* Using a std::initializer_list<uint8_t> as a parameter resulted in
* inefficient code. With the current approach, if the parameters are
* compile-time constants,
* GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}.
* You should not use this function except for compile-time constant:
* it is not efficient.
*/
uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4,
uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8,
uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12,
uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) {
// Doing a load like so end ups generating worse code.
// uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
// x9, x10,x11,x12,x13,x14,x15,x16};
// return vld1q_u8(array);
uint8x16_t x{};
// incredibly, Visual Studio does not allow x[0] = x1
x = vsetq_lane_u8(x1, x, 0);
x = vsetq_lane_u8(x2, x, 1);
x = vsetq_lane_u8(x3, x, 2);
x = vsetq_lane_u8(x4, x, 3);
x = vsetq_lane_u8(x5, x, 4);
x = vsetq_lane_u8(x6, x, 5);
x = vsetq_lane_u8(x7, x, 6);
x = vsetq_lane_u8(x8, x, 7);
x = vsetq_lane_u8(x1, x, 8);
x = vsetq_lane_u8(x1, x, 9);
x = vsetq_lane_u8(x1, x, 10);
x = vsetq_lane_u8(x1, x, 11);
x = vsetq_lane_u8(x1, x, 12);
x = vsetq_lane_u8(x1, x, 13);
x = vsetq_lane_u8(x1, x, 14);
x = vsetq_lane_u8(x1, x, 15);
return x;
}
} // namespace
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment