Skip to content

Instantly share code, notes, and snippets.

@chriso
Last active March 5, 2021 00:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chriso/79dd8009265e69b7c5644aa9a0920c7f to your computer and use it in GitHub Desktop.
Save chriso/79dd8009265e69b7c5644aa9a0920c7f to your computer and use it in GitHub Desktop.
/**
* This is free and unencumbered software released into the public domain.
*
* Anyone is free to copy, modify, publish, use, compile, sell, or
* distribute this software, either in source code form or as a compiled
* binary, for any purpose, commercial or non-commercial, and by any
* means.
*
* In jurisdictions that recognize copyright laws, the author or authors
* of this software dedicate any and all copyright interest in the
* software to the public domain. We make this dedication for the benefit
* of the public at large and to the detriment of our heirs and
* successors. We intend this dedication to be an overt act of
* relinquishment in perpetuity of all present and future rights to this
* software under copyright law.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* For more information, please refer to <http://unlicense.org/>
*/
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#ifdef VARINT_SMALL_BIAS
# define LIKELY(e) __builtin_expect((e), 1)
#endif
const size_t varint_max_len = 9;
size_t varint_write_len(uint64_t n) {
int bits = 64 - __builtin_clzll(n | 1);
return bits > 56 ? varint_max_len : (size_t)(1 + (bits - 1) / 7);
}
size_t varint_read_len(const uint8_t *buf) {
return 1 + (size_t)__builtin_ctz((uint32_t)*buf | 0x100);
}
uint8_t *varint_encode(uint8_t *buf, size_t limit, uint64_t n) {
#ifdef VARINT_SMALL_BIAS
if (LIKELY(limit && n < 0x80)) {
*buf = (uint8_t)(2 * n + 1);
return buf + 1;
}
#endif
size_t len = varint_write_len(n);
if (len > limit)
return NULL;
if (len == varint_max_len) {
*buf++ = 0;
len = 8;
} else {
n = (2 * n + 1) << (len - 1);
}
// when encoding a sequence of varints, the branch predictor
// will see/predict limit>=8 until the end of the sequence
memcpy(buf, &n, limit >= 8 ? 8 : len);
return buf + len;
}
const uint8_t *varint_decode(const uint8_t *buf, size_t limit, uint64_t *n) {
#ifdef VARINT_SMALL_BIAS
if (LIKELY(limit && (*buf & 1))) {
*n = *buf >> 1;
return buf + 1;
}
#endif
size_t len = varint_read_len(buf);
if (len > limit)
return NULL;
if (len == varint_max_len) {
memcpy(n, buf + 1, 8);
} else {
// when decoding a sequence of varints, the branch predictor
// will see/predict limit>=8 until the end of the sequence
memcpy(n, buf, limit >= 8 ? 8 : len);
*n &= ~0ULL >> (8 * (8 - len));
*n >>= len;
}
return buf + len;
}
size_t varint_array_max_size(size_t len) {
return len * varint_max_len;
}
uint8_t *varint_array_encode(const uint64_t *in, size_t len, uint8_t *out,
size_t out_len) {
const uint8_t *end = out + out_len;
for (size_t i = 0; out && i < len; i++)
out = varint_encode(out, (size_t)(end - out), in[i]);
return out;
}
const uint8_t *varint_array_decode(const uint8_t *in, size_t len, uint64_t *out,
size_t out_len, size_t *written) {
const uint8_t *end = in + len;
size_t i = 0;
for (; in && in < end && i < out_len; i++)
in = varint_decode(in, (size_t)(end - in), &out[i]);
*written = i;
return in;
}
int64_t varint_array_count(const uint8_t *in, size_t len) {
int64_t count = 0;
const uint8_t *end = in + len;
for (; in < end; in += varint_read_len(in), count++)
;
return in == end ? count : -1;
}
#include <assert.h>
#include <stdlib.h>
#include "prefix_varint.h"
static struct {
uint64_t in;
uint64_t out1;
uint64_t out2;
size_t len;
} fixtures[] = {
{0, 1, 0, 1},
{1, 3, 0, 1},
{2, 5, 0, 1},
{0x7F, 0xFF, 0, 1},
{0x80, 0x202, 0, 2},
{0x81, 0x206, 0, 2},
{0x3FFF, 0xFFFE, 0, 2},
{0x4000, 0x20004, 0, 3},
{0x4001, 0x2000C, 0, 3},
{0x1FFFFF, 0xFFFFFC, 0, 3},
{0x200000, 0x2000008, 0, 4},
{0x200001, 0x2000018, 0, 4},
{0xFFFFFFF, 0xFFFFFFF8, 0, 4},
{0x10000000, 0x200000010, 0, 5},
{0x10000001, 0x200000030, 0, 5},
{0x7FFFFFFFF, 0xFFFFFFFFF0, 0, 5},
{0x800000000, 0x20000000020, 0, 6},
{0x800000001, 0x20000000060, 0, 6},
{0x3FFFFFFFFFF, 0xFFFFFFFFFFE0, 0, 6},
{0x40000000000, 0x2000000000040, 0, 7},
{0x40000000001, 0x20000000000C0, 0, 7},
{0x1FFFFFFFFFFFF, 0xFFFFFFFFFFFFC0, 0, 7},
{0x2000000000000, 0x200000000000080, 0, 8},
{0x2000000000001, 0x200000000000180, 0, 8},
{0xFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFF80, 0, 8},
{0x100000000000000, 0, 1, 9},
{0x100000000000001, 0x100, 1, 9},
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFF00, 0xFF, 9},
};
int main() {
for (size_t i = 0; i < sizeof(fixtures) / sizeof(fixtures[0]); i++) {
uint64_t n = 0, out[] = {0, 0};
uint8_t *buf = (uint8_t *)out;
// encode
for (size_t j = 0; j <= 16; j++) {
uint8_t *pos = varint_encode(buf, j, fixtures[i].in);
assert((pos == NULL) == (j < fixtures[i].len));
if (j >= fixtures[i].len) {
assert(out[0] == fixtures[i].out1 &&
out[1] == fixtures[i].out2);
assert((size_t)(pos - buf) == fixtures[i].len);
}
}
// decode
for (size_t j = 0; j <= 16; j++) {
const uint8_t *pos = varint_decode(buf, j, &n);
assert((pos == NULL) == (j < fixtures[i].len));
if (j >= fixtures[i].len) {
assert(n == fixtures[i].in);
assert((size_t)(pos - buf) == fixtures[i].len);
}
}
}
size_t mb = 1;
size_t size = mb * 1024 * 1024;
size_t count = size / sizeof(uint64_t);
uint64_t *input = calloc(1, size);
uint64_t *copy = calloc(1, size);
uint8_t *packed = calloc(count, varint_max_len);
assert(input && copy && packed);
size_t max_bytes = 8;
size_t packed_len = 0;
for (size_t i = 0; i < count; i++) {
size_t bytes = (size_t)rand() % max_bytes;
for (size_t j = 0; j < bytes; j++) {
input[i] <<= 8;
input[i] |= (uint64_t)rand() & 0xFF;
}
packed_len += varint_write_len(input[i]);
}
uint8_t *pos =
varint_array_encode(input, count, packed, count * varint_max_len);
assert(pos && pos == packed + packed_len);
assert(varint_array_count(packed, packed_len) == (int64_t)count);
size_t out_len = 0;
const uint8_t *decode_pos =
varint_array_decode(packed, packed_len, copy, count, &out_len);
assert(decode_pos && decode_pos == packed + packed_len);
assert(out_len == count);
assert(memcmp(input, copy, size) == 0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment