Skip to content

Instantly share code, notes, and snippets.

@miura1729
Created September 30, 2023 08:22
Show Gist options
  • Save miura1729/637139f119ace29bbf39a87cd55fcac4 to your computer and use it in GitHub Desktop.
Save miura1729/637139f119ace29bbf39a87cd55fcac4 to your computer and use it in GitHub Desktop.
Can store 8192 datas in 8192 buckets.
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
typedef int mh_key_t;
typedef int mh_value_t;
typedef struct mhash {
uint64_t *exists;
mh_key_t *keys;
mh_value_t *values;
uint16_t *same_hash;
int n_buckets;
} mhash;
const int same_hash_size = 16;
uint64_t ror(uint64_t *base, int sn, int basesize)
{
if (sn < 0) sn += basesize;
int snf = sn + 31;
if (snf > basesize) snf -= basesize;
int snfpos = snf / 64;
int snfoff = snf & 63;
uint64_t snfbit = (1ull << (snfoff + 1)) - 1;
if (snfoff == 63) {
snfbit = -1;
}
int snb = sn - 32;
if (snb < 0) snb += basesize;
int snbpos = snb / 64;
int snboff = snb & 63;
uint64_t snbbit = (1ull << (64 - snboff + 1)) - 1;;
if (snboff == 1) {
snbbit = -1;
}
if (snfpos == snbpos) {
/* Same bitmap */
uint64_t bitmap = base[snfpos];
return bitmap;
}
else {
uint64_t fbit = base[snfpos];
uint64_t bbit = base[snbpos];
fbit = (fbit & snfbit) << (64 - snboff);
bbit = (bbit >> snboff) & snbbit;
return fbit | bbit;
}
}
mhash *mh_init(int size) {
mhash *h = malloc(sizeof(mhash));
h->n_buckets = size;
h->exists = calloc((size + 63) / 64, sizeof(uint64_t));
h->keys = calloc(size, sizeof(mh_key_t));
h->values = calloc(size, sizeof(mh_key_t));
h->same_hash = calloc(size, sizeof(uint16_t));
return h;
}
#define hash_func(hash) (unsigned int)((key)^((key)<<2)^((key)>>2))
int mh_put(mhash *h, mh_key_t key) {
int hv = hash_func(key) % h->n_buckets;
uint64_t hv_bitmap = (1ull << (hv & 63));
if ((h->exists[hv / 64] & hv_bitmap) == 0) {
/* no confilct hash value and entry is fresh */
h->exists[hv / 64] |= hv_bitmap;
h->keys[hv] = key;
h->same_hash[hv] = 1;
return hv;
}
else if (h->same_hash[hv] & 1) {
/* useing entry for same hash value */
uint64_t albitmap = ror(h->exists, hv, h->n_buckets);
uint64_t alorigin = albitmap >> 16;
uint64_t alexits = albitmap >> 32;
int empoff;
empoff = 31 - __builtin_clz((~(uint32_t)alexits) & ((1 << same_hash_size) - 1));
if (empoff > 0 && empoff < same_hash_size) {
int newent = (empoff + hv) % h->n_buckets;
h->same_hash[hv] |= (1 << empoff);
h->exists[newent / 64] |= (1ull << (newent & 63));
h->same_hash[newent] = (empoff << 1);
h->keys[newent] = key;
return newent;
}
else {
for (int i = 1; i < same_hash_size; i++) {
int srcent = (hv + i) % h->n_buckets;
uint16_t csh = h->same_hash[srcent];
if ((csh & 1) == 0 && csh != 0) {
csh >>= 1;
int hv2 = (srcent - csh) % h->n_buckets;
if (hv2 < 0) {
hv2 += h->n_buckets;
}
uint64_t alexits2 = ror(h->exists, hv2, h->n_buckets) >> 32;
int empoff = __builtin_ctzll(alexits2 + 1);
if (empoff < same_hash_size) {
int newent = (empoff + hv2) % h->n_buckets;
uint64_t hv_bitmap = 1ull << (newent & 63);
h->same_hash[hv2] ^= (1 << csh); /* Clear old entry */
h->same_hash[hv2] |= (1 << empoff); /* Set new entry */
h->keys[newent] = h->keys[srcent];
h->values[newent] = h->values[srcent];
h->exists[newent / 64] |= hv_bitmap;
h->same_hash[srcent] = i << 1;
h->same_hash[hv] |= (1 << i);
h->keys[srcent] = key;
return srcent;
}
}
}
}
/* Oh! fill my entry. Must still from other entry */
abort();
}
else {
/* using entry for other hash value. */
int eoff = h->same_hash[hv] >> 1;
int hv2 = (hv - eoff) % h->n_buckets;
if (hv2 < 0) {
hv2 += h->n_buckets;
}
uint64_t alexits = ror(h->exists, hv2, h->n_buckets) >> 32;
int empoff = __builtin_ctzll(alexits + 1);
if (empoff < same_hash_size) {
int newent = (empoff + hv2) % h->n_buckets;
uint64_t hv_bitmap = 1ull << (newent & 63);
h->same_hash[hv2] ^= (1 << eoff); /* Clear old entry */
h->same_hash[hv2] |= (1 << empoff); /* Set new entry */
h->keys[newent] = h->keys[hv];
h->values[newent] = h->values[hv];
h->same_hash[newent] = empoff << 1;
h->exists[newent / 64] |= hv_bitmap;
h->same_hash[hv] = 1;
h->keys[hv] = key;
return hv;
}
abort();
}
return h->n_buckets;
}
int mh_get(mhash *h, mh_key_t key) {
int hv = hash_func(key) % h->n_buckets;
uint16_t hbit = h->same_hash[hv];
if (hbit & 1) {
if (h->keys[hv] == key) {
return hv;
}
hbit ^= 1;
for (int i = __builtin_ctz(hbit); i > 0 && i < 16;i = __builtin_ctz(hbit)) {
int ent = (i + hv) % h->n_buckets;
if (h->keys[ent] == key) {
return ent;
}
hbit ^= (1 << i);
}
}
return -1;
}
main(){
const ndata = 8192;
mhash *h = mh_init(ndata);
for (int i = 0; i < ndata; i++) {
int ent = mh_put(h, i);
printf("%d %d \n", i, ent);
h->values[ent] = i + 1;
}
printf("fin\n");
for (int i = 0; i < ndata; i++) {
int ent = mh_get(h, i);
printf("%d %d \n", h->values[ent], ent);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment