Created
April 26, 2016 06:06
-
-
Save Nexuapex/6fa2d2659c875534f88486e9c087cc15 to your computer and use it in GitHub Desktop.
Sketch of a few ways to store 8 or 16 elements indexed by a packed byte array.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <xmmintrin.h> | |
#include <stdint.h> | |
struct ResourceTable8 | |
{ | |
uint64_t type_bits_; | |
uintptr_t elements_[8]; | |
inline uint64_t mask_for_type(uint8_t type) | |
{ | |
const uint64_t type_bits = type_bits_; | |
const uint64_t splatted_type = 0x0101010101010101ull * type; | |
const uint64_t zeroes = type_bits ^ splatted_type; | |
// http://www.azillionmonkeys.com/qed/asmexample.html | |
const uint64_t mask_lo = 0x7F7F7F7F7F7F7F7Full; | |
const uint64_t mask_hi = 0x8080808080808080ull; | |
const uint64_t inv_matches = (((zeroes & mask_lo) + mask_lo) | zeroes) & mask_hi; | |
const uint64_t matches = inv_matches ^ mask_hi; | |
return matches; | |
} | |
inline uint32_t mask_for_type_mm(uint8_t type) | |
{ | |
const __m64 type_bits = _mm_cvtsi64_m64(type_bits_); | |
const __m64 splatted_type = _mm_set1_pi8(type); | |
const __m64 mask = _mm_cmpeq_pi8(type_bits, splatted_type); | |
return _mm_movemask_pi8(mask); | |
} | |
void forall(uint8_t type); | |
void forall_mm(uint8_t type); | |
}; | |
struct ResourceTable16 | |
{ | |
__m128i type_bits_; | |
uintptr_t elements_[16]; | |
inline uint32_t mask_for_type_xmm(uint8_t type) | |
{ | |
const __m128i type_bits = _mm_loadu_si128(&type_bits_); | |
const __m128i splatted_type = _mm_set1_epi8(type); | |
const __m128i mask = _mm_cmpeq_epi8(type_bits, splatted_type); | |
return _mm_movemask_epi8(mask); | |
} | |
void forall_xmm(uint8_t type); | |
}; | |
#include <stdio.h> | |
void ResourceTable8::forall(uint8_t type) | |
{ | |
uint64_t mask = mask_for_type(type); | |
while (mask) | |
{ | |
const int bit_index = __builtin_ctzll(mask); | |
const int index = (bit_index - 7) >> 3; | |
printf("ResourceTable8::forall %d\n", index); | |
mask &= mask - 1; | |
} | |
} | |
void ResourceTable8::forall_mm(uint8_t type) | |
{ | |
uint32_t mask = mask_for_type_mm(type); | |
while (mask) | |
{ | |
const int index = __builtin_ctz(mask); | |
printf("ResourceTable8::forall_mm %d\n", index); | |
mask &= mask - 1; | |
} | |
} | |
void ResourceTable16::forall_xmm(uint8_t type) | |
{ | |
uint32_t mask = mask_for_type_xmm(type); | |
while (mask) | |
{ | |
const int index = __builtin_ctz(mask); | |
printf("ResourceTable16::forall_xmm %d\n", index); | |
mask &= mask - 1; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment