Skip to content

Instantly share code, notes, and snippets.

@cloudwu
Created June 21, 2023 02:16
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cloudwu/44fd12a14aa77db0b3de1ec2839e1031 to your computer and use it in GitHub Desktop.
Save cloudwu/44fd12a14aa77db0b3de1ec2839e1031 to your computer and use it in GitHub Desktop.
Compress monotonic 48bits id array
#include <stdio.h>
#include <stdint.h>
#include <assert.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
// .size is size of sec[]
// .size * 8 >= .section_n * 8 + .n
// 64bit section :
// eid is a 48bits id (base 1, 0 is invalid id)
// [0 , 24) offset of section (24bits)
// [40, 64) high bits of eid (40bits)
struct eid_array {
int n;
int section_n;
int size;
uint32_t last_position;
uint64_t sec[1];
};
#define DEFAULT_SIZE 64
struct eid_array * eid_append(struct eid_array *E, uint64_t eid);
void eid_free(struct eid_array *E);
uint64_t eid_lookup(struct eid_array *E, int n);
int eid_index(struct eid_array *E, uint64_t eid); // todo
int eid_index_hint(struct eid_array *E, uint64_t eid, int *hint); // todo
int eid_count(struct eid_array *E);
size_t eid_size(struct eid_array *E);
void eid_clear(struct eid_array *E);
void eid_mark_remove(struct eid_array *E, uint64_t eid); // todo
void eid_remove(struct eid_array *E, uint64_t from); // todo
static inline size_t
eid_size_(int size) {
size *= sizeof(uint64_t);
return sizeof(struct eid_array) - sizeof(uint64_t) + size;
}
static struct eid_array *
eid_new_(int size) {
struct eid_array *E = (struct eid_array *)malloc(eid_size_(size));
E->n = 0;
E->section_n = 0;
E->size = size;
E->last_position = 0;
return E;
}
static inline uint8_t *
address(const struct eid_array *E, int offset) {
uint8_t * top = (uint8_t *)&E->sec[E->size];
return top - offset - 1;
}
static void
copy(struct eid_array *dst, const struct eid_array *src) {
dst->n = src->n;
dst->section_n = src->section_n;
dst->last_position = src->last_position;
if (dst->n > 0) {
memcpy(dst->sec, src->sec, (src->section_n + 1) * sizeof(uint64_t));
memcpy(address(dst, dst->n-1), address(src, src->n-1), src->n);
}
}
struct eid_array *
eid_append(struct eid_array *E, uint64_t eid) {
if (E == NULL) {
E = eid_new_(DEFAULT_SIZE);
}
if (E->n == 0) {
E->n = 1;
E->section_n = 1;
E->sec[0] = (eid >> 8) << 24;
E->sec[1] = 1;
*address(E, 0) = eid & 0xff;
return E;
}
int n = E->n++;
if (n >= (E->size - E->section_n - 2) * sizeof(uint64_t)) {
struct eid_array * tmp = eid_new_(E->size * 3 / 2);
copy(tmp, E);
free(E);
E = tmp;
}
uint8_t low = eid & 0xff;
uint64_t sec = E->sec[E->section_n-1] >> 24;
uint8_t *ptr = address(E, n);
*ptr = low;
if ((eid >> 8) == sec) {
// the same section
assert(low > ptr[1]);
++E->sec[E->section_n];
} else {
// new section
uint64_t eid_high = eid >> 8;
int sn = E->section_n++;
E->sec[sn] = (eid_high << 24) | n;
E->sec[sn+1] = n+1;
}
return E;
}
void eid_free(struct eid_array *E) {
free(E);
}
static int
lookup_section(struct eid_array *E, int from_section, int n) {
int begin = from_section;
int end = E->section_n;
while (begin < end) {
int mid = (begin + end) / 2;
int offset = E->sec[mid] & 0xffffff;
if (offset == n)
return mid;
else if (offset < n) {
begin = mid + 1;
} else {
end = mid;
}
}
return begin - 1;
}
static inline uint64_t
combine_eid(uint64_t sec, uint8_t low) {
return ((sec >> 24) << 8) | low;
}
uint64_t
eid_lookup(struct eid_array *E, int n) {
if (n >= E->n)
return 0;
int sn = E->last_position >> 8;
uint64_t sec = E->sec[sn];
int offset = sec & 0xffffff;
if (n < offset) {
// before last_position
E->last_position = 0;
return eid_lookup(E, n);
}
int next = E->sec[sn+1] & 0xffffff;
if (n < next) {
E->last_position = (sn << 8) | (n - offset);
return combine_eid(sec, *address(E, n));
} else {
++sn;
if (n > next) {
sn = lookup_section(E, sn, n);
E->last_position = sn << 8 | (n - (E->sec[sn] & 0xffffff));
} else {
E->last_position = sn << 8;
}
return combine_eid(E->sec[sn], *address(E, n));
}
}
int
eid_count(struct eid_array *E) {
return E->n;
}
size_t
eid_size(struct eid_array *E) {
return eid_size_(E->size);
}
void
eid_clear(struct eid_array *E) {
E->n = 0;
E->section_n = 0;
}
#include <stdio.h>
static void
test(struct eid_array *E) {
int n = eid_count(E);
int i;
for (i = 0; i < n; i++) {
uint64_t id = eid_lookup(E, i);
if (id != (uint64_t)i * 2 + 1) {
printf("%d : %lld\n", i, id);
}
}
}
int
main() {
struct eid_array *E = NULL;
int i;
int n = 10000000;
for (i = 0; i < n; i++) {
uint64_t eid = (uint64_t)i*2+1;
E = eid_append(E, eid);
}
test(E);
eid_free(E);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment