Last active
April 10, 2020 00:32
-
-
Save aqrit/f666e05362bc75376ac59a34879fad4f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stddef.h> | |
#include <stdint.h> | |
/// flags | |
#define SVB_NORMAL 0 | |
#define SVB_SPECIALCASEZERO 1 | |
#define SVB_DELTAASCENDING 2 | |
#define SVB_DELTADESCENDING 4 | |
#define SVB_VARSIGNEXTEND 8 | |
#define SVB_ZIGZAG 16 | |
/* | |
SVB_NORMAL: | |
| key | cb | ranges | level shifted ranges | | |
|------|----|---------------|------------------------| | |
| 0b00 | 1 | 0..0x000000FF | 0xFFFFFF80..0x0000007F | | |
| 0b01 | 2 | 0..0x0000FFFF | 0xFFFF8000..0x00007FFF | | |
| 0b10 | 3 | 0..0x00FFFFFF | 0xFF800000..0x007FFFFF | | |
| 0b11 | 4 | 0..0xFFFFFFFF | 0x80000000..0x7FFFFFFF | | |
SVB_SPECIALCASEZERO: | |
| key | cb | ranges | level shifted ranges | | |
|------|----|---------------|------------------------| | |
| 0b00 | 0 | 0..0x00000000 | 0x00000000..0x00000000 | | |
| 0b01 | 1 | 0..0x000000FF | 0xFFFFFF80..0x0000007F | | |
| 0b10 | 2 | 0..0x0000FFFF | 0xFFFF8000..0x00007FFF | | |
| 0b11 | 4 | 0..0xFFFFFFFF | 0x80000000..0x7FFFFFFF | | |
SVB_DELTAASCENDING: | |
delta encoding for input that is mostly ordered | |
from the smallest to the largest number | |
SVB_DELTADESCENDING: | |
delta encoding for input that is mostly ordered | |
from the largest to the smallest number | |
SVB_VARSIGNEXTEND: | |
level shifted the ranges | |
(similar to zigzag encoding but it uses a different strategy) | |
SVB_ZIGZAG: | |
level shifted the ranges | |
*/ | |
/** | |
* STREAMVBYTE_ENCODE... | |
* | |
* Macro Pitfalls: | |
* All [in,out] arguments must NOT be an expression. | |
* `flags` should be an integer constant expression. | |
* | |
* @param [in,out] dest - uint8_t array | |
* @param [in,out] src - const uint32_t array | |
* @param [in] count - number of elements in src array | |
* @param [in] flags | |
* @param [in,out] prev - u32 base to start delta ops (if delta flag) | |
*/ | |
#define STREAMVBYTE_ENCODE(dest, src, count, flags, prev) \ | |
do { \ | |
uint8_t* key_ptr = dest; \ | |
size_t cnt = (count); \ | |
const uint32_t* end; \ | |
dest += SVB_KEY_BLOCK_SIZE(cnt); /* start of data block */ \ | |
\ | |
/* encode 4 elements and produce 1 key byte per iteration */ \ | |
for (end = &src[(cnt | 3) ^ 3]; src != end; src += 4) { \ | |
uint32_t v0, v1, v2, v3; /* values */ \ | |
uint32_t k0, k1, k2, k3; /* keys */ \ | |
uint32_t l0, l1, l2, l3; /* lengths */ \ | |
\ | |
if (SVB_DELTAASCENDING & (flags)) { \ | |
v0 = src[0] - prev; \ | |
v1 = src[1] - src[0]; \ | |
v2 = src[2] - src[1]; \ | |
v3 = src[3] - src[2]; \ | |
prev = src[3]; \ | |
} else { \ | |
if (SVB_DELTADESCENDING & (flags)) { \ | |
v0 = prev; \ | |
v1 = src[0]; \ | |
v2 = src[1]; \ | |
v3 = src[2]; \ | |
prev = src[3]; \ | |
\ | |
v0 -= v1; \ | |
v1 -= v2; \ | |
v2 -= v3; \ | |
v3 -= prev; \ | |
} else { \ | |
v0 = src[0]; \ | |
v1 = src[1]; \ | |
v2 = src[2]; \ | |
v3 = src[3]; \ | |
} \ | |
} \ | |
\ | |
if (SVB_ZIGZAG & (flags)) { \ | |
v0 = (v0 + v0) ^ -(v0 >> 31); \ | |
v1 = (v1 + v1) ^ -(v1 >> 31); \ | |
v2 = (v2 + v2) ^ -(v2 >> 31); \ | |
v3 = (v3 + v3) ^ -(v3 >> 31); \ | |
} \ | |
k0 = v0; \ | |
k1 = v1; \ | |
k2 = v2; \ | |
k3 = v3; \ | |
\ | |
if (SVB_VARSIGNEXTEND & (flags)) { \ | |
/* find where the "extended" sign bits can be truncated */ \ | |
/* aka. find the bit flip nearest to the MSB */ \ | |
/* aka. the first part of __builtin_clrsb() */ \ | |
k0 ^= k0 + k0; \ | |
k1 ^= k1 + k1; \ | |
k2 ^= k2 + k2; \ | |
k3 ^= k3 + k3; \ | |
} \ | |
\ | |
if (SVB_SPECIALCASEZERO & (flags)) { \ | |
/* smear left... before SVB_KEY op */ \ | |
k0 |= (k0 << 8); \ | |
k1 |= (k1 << 8); \ | |
k2 |= (k2 << 8); \ | |
k3 |= (k3 << 8); \ | |
} \ | |
SVB_KEY(k0); \ | |
SVB_KEY(k1); \ | |
SVB_KEY(k2); \ | |
SVB_KEY(k3); \ | |
if (SVB_SPECIALCASEZERO & (flags)) { \ | |
l0 = k0 + (k0 >= 3); /* (k*5 + 1) >> 2 */ \ | |
l1 = k1 + (k1 >= 3); /* ((k >> 1) & k) + k */ \ | |
l2 = k2 + (k2 >= 3); \ | |
l3 = k3 + (k3 >= 3); \ | |
} else { \ | |
l0 = k0 + 1; \ | |
l1 = k1 + 1; \ | |
l2 = k2 + 1; \ | |
l3 = k3 + 1; \ | |
} \ | |
\ | |
SVB_PUT_LE32(dest, v0); \ | |
dest += l0; \ | |
SVB_PUT_LE32(dest, v1); \ | |
dest += l1; \ | |
SVB_PUT_LE32(dest, v2); \ | |
dest += l2; \ | |
SVB_PUT_LE32(dest, v3); \ | |
dest += l3; \ | |
\ | |
*key_ptr++ = (k3 << 6) | (k2 << 4) | ((k1 * 4) + k0); \ | |
} \ | |
\ | |
/* tail loop */ \ | |
cnt &= 3; \ | |
if (cnt != 0) { \ | |
uint32_t key = 0; \ | |
uint32_t shift = 0; \ | |
end = &src[cnt]; \ | |
do { \ | |
uint32_t v; \ | |
if (SVB_DELTAASCENDING & (flags)) { \ | |
v = src[0] - prev; \ | |
prev = src[0]; \ | |
} else { \ | |
if (SVB_DELTADESCENDING & (flags)) { \ | |
v = prev; \ | |
prev = src[0]; \ | |
v -= prev; \ | |
} else { \ | |
v = src[0]; \ | |
} \ | |
} \ | |
if (SVB_ZIGZAG & (flags)) v = (v + v) ^ -(v >> 31); \ | |
SVB_PUT_LE32(dest, v); \ | |
if (SVB_VARSIGNEXTEND & (flags)) v ^= v + v; \ | |
if (SVB_SPECIALCASEZERO & (flags)) v |= (v << 8); \ | |
SVB_KEY(v); \ | |
if (SVB_SPECIALCASEZERO & (flags)) { \ | |
dest += v + (v >= 3); \ | |
} else { \ | |
dest += v + 1; \ | |
} \ | |
key |= (v << shift); \ | |
shift += 2; \ | |
} while (++src != end); \ | |
*key_ptr = key; \ | |
} \ | |
} while (0) | |
/** | |
* STREAMVBYTE_DECODE... | |
* | |
* Macro Pitfalls: | |
* All [in,out] arguments must NOT be an expression. | |
* `flags` should be an integer constant expression. | |
* | |
* @param [in,out] dest - uint32_t array | |
* @param [in,out] src - const uint8_t array | |
* @param [in] count - number of elements encoded into src stream | |
* @param [in] flags | |
* @param [in,out] prev - u32 base to start delta ops (if delta flag) | |
*/ | |
#define STREAMVBYTE_DECODE(dest, src, count, flags, prev) \ | |
do { \ | |
const uint8_t* key_ptr = src; \ | |
size_t cnt = (count); \ | |
src += SVB_KEY_BLOCK_SIZE(cnt); /* start of data block */ \ | |
\ | |
if (cnt > 12) { /* use key chunk as padding */ \ | |
const uint32_t* end = &dest[(cnt | 3) ^ 3]; \ | |
src -= 4; \ | |
do { \ | |
const uint32_t mask = 0x401041; \ | |
uint32_t v0, v1, v2, v3; /* values */ \ | |
uint32_t s0, s1, s2, s3; /* shift amounts */ \ | |
uint32_t l0, l1, l2, l3; /* lengths */ \ | |
uint32_t k = *key_ptr++; /* key bits */ \ | |
\ | |
/* calc packed sizes and shift amounts (using swar) */ \ | |
if (SVB_SPECIALCASEZERO & (flags)) { \ | |
if (SVB_VARSIGNEXTEND & (flags)) { \ | |
k = (k * 0x00101004) & mask*12; \ | |
k = k*5 + mask*4; \ | |
k = ((k + mask*4) & mask*24) | ((k >> 4) & mask*7);\ | |
} else { \ | |
k = k * 0x00040401 & mask*3; \ | |
k = (k*9 + mask*5 ) & mask*0x38; \ | |
k = (mask*32 - k) | (k >> 3); \ | |
} \ | |
} else { \ | |
k = k * 0x00040401 & mask*3; \ | |
k = (k*9 + mask) ^ mask*24; \ | |
} \ | |
\ | |
/* extract */ \ | |
l0 = k & 7; \ | |
l1 = (k >> 12) & 7; \ | |
l2 = (k >> 22) & 7; \ | |
l3 = (k >> 6) & 7; \ | |
s0 = k & 0x38; \ | |
s1 = (k >> 12) & 0x38; \ | |
s2 = (k >> 22) & 0x38; \ | |
s3 = (k >> 6) & 0x38; \ | |
\ | |
/* overlapping reads */ \ | |
src += l0; \ | |
SVB_GET_LE32(src, v0); \ | |
src += l1; \ | |
SVB_GET_LE32(src, v1); \ | |
src += l2; \ | |
SVB_GET_LE32(src, v2); \ | |
src += l3; \ | |
SVB_GET_LE32(src, v3); \ | |
\ | |
if (SVB_SPECIALCASEZERO & (flags)) { \ | |
if (SVB_VARSIGNEXTEND & (flags)) { \ | |
/* if length is zero then zero the upper bits */ \ | |
v0 &= -l0 | 7; \ | |
v1 &= -l1 | 7; \ | |
v2 &= -l2 | 7; \ | |
v3 &= -l3 | 7; \ | |
} \ | |
} \ | |
\ | |
/* shift-off unwanted bytes */ \ | |
if (SVB_VARSIGNEXTEND & (flags)) { \ | |
/* implementation-defined behavior (sticky-sign-bit) */ \ | |
v0 = (int32_t)v0 >> s0; \ | |
v1 = (int32_t)v1 >> s1; \ | |
v2 = (int32_t)v2 >> s2; \ | |
v3 = (int32_t)v3 >> s3; \ | |
} else { \ | |
if (SVB_SPECIALCASEZERO & (flags)) { \ | |
/* may shift right by 32 to get a zero value */ \ | |
v0 = (uint64_t)v0 >> s0; \ | |
v1 = (uint64_t)v1 >> s1; \ | |
v2 = (uint64_t)v2 >> s2; \ | |
v3 = (uint64_t)v3 >> s3; \ | |
} else { \ | |
v0 >>= s0; \ | |
v1 >>= s1; \ | |
v2 >>= s2; \ | |
v3 >>= s3; \ | |
} \ | |
} \ | |
\ | |
/* recover */ \ | |
if (SVB_ZIGZAG & (flags)) { \ | |
v0 = (v0 >> 1) ^ -(v0 & 1); \ | |
v1 = (v1 >> 1) ^ -(v1 & 1); \ | |
v2 = (v2 >> 1) ^ -(v2 & 1); \ | |
v3 = (v3 >> 1) ^ -(v3 & 1); \ | |
} \ | |
if (SVB_DELTAASCENDING & (flags)) { \ | |
v0 += prev; \ | |
v1 += v0; \ | |
v2 += v1; \ | |
v3 += v2; \ | |
prev = v3; \ | |
} else { \ | |
if (SVB_DELTADESCENDING & (flags)) { \ | |
prev -= v0; v0 = prev; \ | |
prev -= v1; v1 = prev; \ | |
prev -= v2; v2 = prev; \ | |
prev -= v3; v3 = prev; \ | |
} \ | |
} \ | |
\ | |
/* store */ \ | |
dest[0] = v0; \ | |
dest[1] = v1; \ | |
dest[2] = v2; \ | |
dest[3] = v3; \ | |
dest += 4; \ | |
} while (dest != end); \ | |
\ | |
cnt &= 3; \ | |
src += 4; \ | |
} \ | |
\ | |
/* tail loop */ \ | |
if (cnt != 0) { \ | |
uint32_t keys = key_ptr[0]; \ | |
if (cnt > 4) { \ | |
keys |= key_ptr[1] << 8; \ | |
if (cnt > 8) { \ | |
keys |= key_ptr[2] << 16; \ | |
} \ | |
} \ | |
do { \ | |
uint32_t v; \ | |
uint32_t k = keys & 3; \ | |
keys >>= 2; \ | |
if (SVB_SPECIALCASEZERO & (flags)) { \ | |
if (SVB_VARSIGNEXTEND & (flags)) { \ | |
switch (k) { \ | |
case 3: \ | |
SVB_GET_LE32(src, v); \ | |
src += 4; \ | |
break; \ | |
case 2: \ | |
SVB_GET_LE16(src, v); \ | |
v = (int16_t)v; \ | |
src += 2; \ | |
break; \ | |
case 1: \ | |
v = (int8_t)*src++; \ | |
break; \ | |
case 0: v = 0; \ | |
} \ | |
} else { \ | |
switch (k) { \ | |
case 3: \ | |
SVB_GET_LE32(src, v); \ | |
src += 4; \ | |
break; \ | |
case 2: \ | |
SVB_GET_LE16(src, v); \ | |
src += 2; \ | |
break; \ | |
case 1: \ | |
v = *src++; \ | |
break; \ | |
case 0: \ | |
v = 0; \ | |
} \ | |
} \ | |
} else { \ | |
if (SVB_VARSIGNEXTEND & (flags)) { \ | |
switch (k) { \ | |
case 3: \ | |
SVB_GET_LE32(src, v); \ | |
src += 4; \ | |
break; \ | |
case 2: \ | |
SVB_GET_LE32(src - 1, v); \ | |
v = (int32_t)v >> 8; \ | |
src += 3; \ | |
break; \ | |
case 1: \ | |
SVB_GET_LE16(src, v); \ | |
v = (int16_t)v; \ | |
src += 2; \ | |
break; \ | |
case 0: \ | |
v = (int8_t)*src++; \ | |
} \ | |
} else { \ | |
switch (k) { \ | |
case 3: \ | |
SVB_GET_LE32(src, v); \ | |
src += 4; \ | |
break; \ | |
case 2: \ | |
SVB_GET_LE32(src - 1, v); \ | |
v >>= 8; \ | |
src += 3; \ | |
break; \ | |
case 1: \ | |
SVB_GET_LE16(src, v); \ | |
src += 2; \ | |
break; \ | |
case 0: \ | |
v = *src++; \ | |
} \ | |
} \ | |
} \ | |
\ | |
if (SVB_ZIGZAG & (flags)) v = (v >> 1) ^ -(v & 1); \ | |
\ | |
if (SVB_DELTAASCENDING & (flags)) { \ | |
prev += v; v = prev; \ | |
} else { \ | |
if (SVB_DELTADESCENDING & (flags)) { \ | |
prev -= v; v = prev; \ | |
} \ | |
} \ | |
\ | |
*dest++ = v; \ | |
} while(--cnt); \ | |
} \ | |
} while (0) | |
/** | |
* Every encoded element is represented by 2-bit key. All keys are | |
* packed into a block that precedes the variable sized data block. | |
* The key block is padded out to a byte boundry. | |
*/ | |
#define SVB_KEY_BLOCK_SIZE(count) ((count >> 2) + ((count & 3) != 0)) | |
/** | |
* Return the index of the highest set byte. | |
* (If no bytes are set then return zero) | |
* | |
* If the cpu supports a FAST way to count leading zero bits | |
* then it should be used. Elsewise emulating a __builtin_clz() | |
* operations is slower than necessary because only byte precision | |
* is required. | |
* | |
* Note: BSR is slow on AMD, LZCNT requires BMI1, and who knows w/ARM. | |
*/ | |
#if HAVE_FAST_CLZ && defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) | |
# define SVB_KEY(v) \ | |
v = (((uint32_t)__builtin_clz(v | 1) ^ 31) >> 3) | |
#else | |
# define SVB_KEY(v) \ | |
v >>= 8; \ | |
v = ((((v + 0xFFFF00) | 0xFFFFFF) + v) | (v + 0xFF0000)) >> 24 | |
#endif | |
/// Load 16-bits from an unaligned location in little endian byte order | |
#define SVB_GET_LE16(ptr, v) \ | |
do { \ | |
uint8_t* p_ = (uint8_t*)(ptr); \ | |
uint16_t v_ = (((uint16_t)p_[0])) | (((uint16_t)p_[1]) << 8); \ | |
(v) = v_; \ | |
} while (0) | |
/// Load 32-bits from an unaligned location in little endian byte order | |
#define SVB_GET_LE32(ptr, v) \ | |
do { \ | |
uint8_t* p_ = (uint8_t*)(ptr); \ | |
uint32_t v_ = \ | |
(((uint32_t)p_[0])) | \ | |
(((uint32_t)p_[1]) << 8) | \ | |
(((uint32_t)p_[2]) << 16) | \ | |
(((uint32_t)p_[3]) << 24); \ | |
(v) = v_; \ | |
} while (0) | |
/// Store 16-bits to an unaligned location in little endian byte order | |
#define SVB_PUT_LE16(ptr, v) \ | |
do { \ | |
uint16_t v_ = (v); \ | |
uint8_t* p_ = (uint8_t*)(ptr); \ | |
p_[0] = v_; \ | |
p_[1] = v_ >> 8; \ | |
} while (0) | |
/// Store 32-bits to an unaligned location in little endian byte order | |
#define SVB_PUT_LE32(ptr, v) \ | |
do { \ | |
uint32_t v_ = (v); \ | |
uint8_t* p_ = (uint8_t*)(ptr); \ | |
p_[0] = v_; \ | |
p_[1] = v_ >> 8; \ | |
p_[2] = v_ >> 16; \ | |
p_[3] = v_ >> 24; \ | |
} while (0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment