Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Sketch of a few ways to store 8 or 16 elements indexed by a packed byte array.
#include <xmmintrin.h>
#include <stdint.h>
struct ResourceTable8
{
uint64_t type_bits_;
uintptr_t elements_[8];
inline uint64_t mask_for_type(uint8_t type)
{
const uint64_t type_bits = type_bits_;
const uint64_t splatted_type = 0x0101010101010101ull * type;
const uint64_t zeroes = type_bits ^ splatted_type;
// http://www.azillionmonkeys.com/qed/asmexample.html
const uint64_t mask_lo = 0x7F7F7F7F7F7F7F7Full;
const uint64_t mask_hi = 0x8080808080808080ull;
const uint64_t inv_matches = (((zeroes & mask_lo) + mask_lo) | zeroes) & mask_hi;
const uint64_t matches = inv_matches ^ mask_hi;
return matches;
}
inline uint32_t mask_for_type_mm(uint8_t type)
{
const __m64 type_bits = _mm_cvtsi64_m64(type_bits_);
const __m64 splatted_type = _mm_set1_pi8(type);
const __m64 mask = _mm_cmpeq_pi8(type_bits, splatted_type);
return _mm_movemask_pi8(mask);
}
void forall(uint8_t type);
void forall_mm(uint8_t type);
};
struct ResourceTable16
{
__m128i type_bits_;
uintptr_t elements_[16];
inline uint32_t mask_for_type_xmm(uint8_t type)
{
const __m128i type_bits = _mm_loadu_si128(&type_bits_);
const __m128i splatted_type = _mm_set1_epi8(type);
const __m128i mask = _mm_cmpeq_epi8(type_bits, splatted_type);
return _mm_movemask_epi8(mask);
}
void forall_xmm(uint8_t type);
};
#include <stdio.h>
void ResourceTable8::forall(uint8_t type)
{
uint64_t mask = mask_for_type(type);
while (mask)
{
const int bit_index = __builtin_ctzll(mask);
const int index = (bit_index - 7) >> 3;
printf("ResourceTable8::forall %d\n", index);
mask &= mask - 1;
}
}
void ResourceTable8::forall_mm(uint8_t type)
{
uint32_t mask = mask_for_type_mm(type);
while (mask)
{
const int index = __builtin_ctz(mask);
printf("ResourceTable8::forall_mm %d\n", index);
mask &= mask - 1;
}
}
void ResourceTable16::forall_xmm(uint8_t type)
{
uint32_t mask = mask_for_type_xmm(type);
while (mask)
{
const int index = __builtin_ctz(mask);
printf("ResourceTable16::forall_xmm %d\n", index);
mask &= mask - 1;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.