Skip to content

Instantly share code, notes, and snippets.

@cloudwu
Created June 21, 2023 06:15
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cloudwu/cc3cb244323f3bc6f506e4bc0772d057 to your computer and use it in GitHub Desktop.
Save cloudwu/cc3cb244323f3bc6f506e4bc0772d057 to your computer and use it in GitHub Desktop.
Compress monotonic 48bits id array, version 2
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#define CHUNK_N 64
struct eid_chunk {
uint64_t id[2];
uint8_t v[CHUNK_N];
};
struct eid_hibits {
// uint8_t b[5]; // 40bits
uint64_t x64;
};
struct eid_array {
int n;
int discrete;
int size;
int discrete_top;
union {
struct eid_chunk c[1];
struct eid_hibits d[1];
} u;
};
#define DEFAULT_SIZE 64
struct eid_array * eid_append(struct eid_array *E, uint64_t eid);
void eid_free(struct eid_array *E);
uint64_t eid_index(struct eid_array *E, int n);
int eid_count(struct eid_array *E);
static inline size_t
eid_size_(int size) {
size = (size - 1) * sizeof(struct eid_chunk);
return sizeof(struct eid_array) + size;
}
static struct eid_array *
eid_new_(int size) {
struct eid_array *E = (struct eid_array *)malloc(eid_size_(size));
E->n = 0;
E->discrete = 0;
E->size = size;
E->discrete_top = size * sizeof(struct eid_chunk) / sizeof(struct eid_hibits) - 1;
return E;
}
static inline uint64_t
make_index(uint64_t eid, uint8_t c[3]) {
// c[0] : number of section 1 (use id[0] as hibits of eid)
// c[1] : number of section 1+2 (use id[1] as hibits of eid)
// c[2] : number of section 1+2+3 (use id[1]+1 as hibits of eid)
return (eid >> 8) << 24 | (c[0] << 16) | (c[1] << 8) | c[2];
}
static void
copy(struct eid_array *dst, const struct eid_array *src) {
int n = dst->n = src->n;
int discrete = dst->discrete = src->discrete;
int chunk_n = (n + CHUNK_N - 1) / CHUNK_N;
if (n > 0) {
memcpy(dst->u.c, src->u.c, chunk_n * sizeof(struct eid_chunk));
memcpy(&dst->u.d[dst->discrete_top - discrete + 1], &src->u.d[src->discrete_top - discrete + 1], discrete * sizeof(struct eid_hibits));
}
}
static inline void
init_chunk(struct eid_chunk *c, uint64_t eid) {
uint8_t t[3] = { 1, 1, 1 };
c->id[0] = make_index(eid, t);
c->v[0] = eid & 0xff;
}
static inline int
same_hibits(uint64_t eid, uint64_t index) {
return (eid >> 8) == (index >> 24);
}
static inline void
set_hibits(struct eid_hibits *h, uint64_t eid) {
// int i;
// for (i=0;i<5;i++) {
// eid >>= 8;
// h->b[i] = eid & 0xff;
// }
h->x64 = eid;
}
static inline uint64_t
get_hibits(struct eid_hibits *h) {
// int i;
// uint64_t r = 0;
// for (i=0;i<5;i++) {
// r |= h->b[i] << ((i+1) * 8);
// }
// return r;
return h->x64;
}
struct eid_array *
eid_append(struct eid_array *E, uint64_t eid) {
if (E == NULL) {
E = eid_new_(DEFAULT_SIZE);
}
if (E->n == 0) {
E->n = 1;
E->discrete = 0;
init_chunk(&E->u.c[0], eid);
return E;
}
int n = E->n++;
int chunk_n = n / CHUNK_N;
int need_size = chunk_n + (E->discrete + 1) * sizeof(struct eid_hibits) / sizeof(struct eid_chunk) + 2;
if (need_size > E->size) {
int new_size = E->size * 3 / 2;
assert(new_size >= need_size);
struct eid_array * tmp = eid_new_(new_size);
copy(tmp, E);
free(E);
E = tmp;
}
struct eid_chunk * c = &E->u.c[chunk_n];
int idx = n % CHUNK_N;
if (idx == 0) {
// new chunk
init_chunk(c, eid);
return E;
}
c->v[idx] = eid & 0xff;
if (same_hibits(eid, c->id[0])) {
c->id[0] += 0x010101;
return E;
}
uint8_t sec1 = (c->id[0] >> 16) & 0xff;
if (idx == sec1) {
// All elements in this chunk is in section 1
c->id[0] += 0x0101;
c->id[1] = (eid >> 8) << 24;
return E;
}
assert(idx > sec1);
if (same_hibits(eid, c->id[1])) {
// In section 2
c->id[0] += 0x0101;
return E;
}
if (same_hibits(eid - 0x100, c->id[1])) {
// In section 3
c->id[0] += 0x01;
return E;
}
// In section 4
int discrete = E->discrete++;
if (idx == (c->id[0] & 0xff)) {
// The first one of section 4
assert(discrete < 0x1000000); // 24bits
c->id[1] |= discrete;
}
set_hibits(&E->u.d[E->discrete_top - discrete], eid);
return E;
}
void
eid_free(struct eid_array *E) {
free(E);
}
static inline uint64_t
make_eid(uint64_t hi, uint8_t low) {
return (hi >> 24) << 8 | low;
}
uint64_t
eid_index(struct eid_array *E, int n) {
if (n >= E->n)
return 0;
int chunk_n = n / CHUNK_N;
struct eid_chunk *c = &E->u.c[chunk_n];
int idx = n % CHUNK_N;
uint8_t sec3 = c->id[0] & 0xff;
if (idx >= sec3) {
int offset = c->id[1] & 0xffffff;
struct eid_hibits * d = &E->u.d[E->discrete_top - offset - (idx - sec3)];
// return get_hibits(d) | low;
return get_hibits(d);
}
uint8_t low = c->v[idx];
uint8_t sec1 = (c->id[0] >> 16) & 0xff;
if (idx < sec1)
return make_eid(c->id[0], low);
uint8_t sec2 = (c->id[0] >> 8) & 0xff;
uint64_t r = make_eid(c->id[1], low);
if (idx >= sec2)
return r + 0x100;
return r;
}
int
eid_count(struct eid_array *E) {
return E->n;
}
#include <stdio.h>
void
eid_dump(struct eid_array *E, int chunk_id) {
struct eid_chunk *c = &E->u.c[chunk_id];
uint8_t t[3] = { (uint8_t)(c->id[0] >> 16) & 0xff, (uint8_t)(c->id[0] >> 8) & 0xff, (uint8_t)c->id[0] & 0xff };
printf("%llx, %llx, [%d %d %d]\n", c->id[0] >> 24, c->id[1] >> 24,
t[0], t[1], t[2]);
int i;
for (i = 0; i< 64; i++) {
printf("%02x ", c->v[i]);
}
printf("\n");
int index = c->id[1] & 0xffffff;
printf("Index = %d discrete = %d\n", index, E->discrete);
struct eid_hibits * d = &E->u.d[E->discrete_top - index];
for (i = 0; i< 64 - t[2];i++) {
printf("%llx ", get_hibits(d - i));
}
printf("\n");
}
static void
test(struct eid_array *E) {
int n = eid_count(E);
int i;
for (i = 0; i < n; i++) {
uint64_t id = eid_index(E, i);
if (id != (uint64_t)i * 15 + 1) {
printf("%d : %lld\n", i, id);
}
}
}
struct eid_array2 {
int n;
uint64_t *v;
};
static uint64_t
eid_index2(struct eid_array2 *E, int n) {
if (n >= E->n)
return 0;
return E->v[n];
}
static void
test2(struct eid_array2 *E) {
int n = E->n;
int i;
for (i = 0; i < n; i++) {
uint64_t id = eid_index2(E, i);
if (id != (uint64_t)i * 15 + 1) {
printf("%d : %lld\n", i, id);
}
}
}
#include <time.h>
int
main() {
struct eid_array *E = NULL;
int i;
int n = 10000000;
for (i = 0; i < n; i++) {
uint64_t eid = (uint64_t)i*15+1;
E = eid_append(E, eid);
}
eid_dump(E, 0);
clock_t c = clock();
for (i=0;i<10;i++)
test(E);
c = clock() - c;
printf("time = %d\n", (int)c);
eid_free(E);
struct eid_array2 E2;
E2.n = n;
E2.v = (uint64_t *)malloc(n * sizeof(uint64_t));
for (i = 0; i < n; i++) {
uint64_t eid = (uint64_t)i*15+1;
E2.v[i] = eid;
}
c = clock();
for (i=0;i<10;i++)
test2(&E2);
c = clock() - c;
printf("time = %d\n", (int)c);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment