Skip to content

Instantly share code, notes, and snippets.

@miura1729
Last active October 13, 2023 09:23
Show Gist options
  • Save miura1729/ed0d060f5ba7ebb0bc9092acc5702525 to your computer and use it in GitHub Desktop.
Save miura1729/ed0d060f5ba7ebb0bc9092acc5702525 to your computer and use it in GitHub Desktop.
Can store 64000 data in 65536 buckets
#include "wyhash32.h"
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
typedef int mh_key_t;
typedef int mh_value_t;
int pure;
int crash;
int crash2;
int move;
int para;
int mpara;
unsigned
hash_func(int key) {
return wy32x32(key, 0x9E3779B9ul);
}
typedef struct mhash {
uint64_t *exists;
mh_key_t *keys;
mh_value_t *values;
uint32_t *same_hash;
int *prev_hv;
int n_buckets;
int current_prev_hv;
} mhash;
const int same_hash_size = 16;
uint64_t ror(uint64_t *base, int sn, int basesize)
{
if (sn < 0) sn += basesize;
int snf = sn + 31;
if (snf > basesize) snf -= basesize;
int snfpos = snf / 64;
int snfoff = snf & 63;
uint64_t snfbit = (snfoff == 63) ? -1 : (1ull << (snfoff + 1)) - 1;
int snb = sn - 32;
if (snb < 0) snb += basesize;
int snbpos = snb / 64;
int snboff = snb & 63;
uint64_t snbbit = (1ull << (64 - snboff + 1)) - 1;;
if (snboff == 1) {
snbbit = -1;
}
if (0 && snfpos == snbpos) {
/* Same bitmap */
uint64_t bitmap = base[snfpos];
uint64_t snbit = (1ll << (snb + 1)) - 1;
return (((bitmap & snbit) << (64 - snboff)) | (bitmap >> snboff));
}
else {
uint64_t fbit = base[snfpos];
uint64_t bbit = base[snbpos];
fbit = (fbit & snfbit) << (64 - snboff);
bbit = (bbit >> snboff) & snbbit;
return fbit | bbit;
}
}
mhash *mh_init(int size) {
mhash *h = malloc(sizeof(mhash));
h->n_buckets = size;
h->exists = calloc((size + 63) / 64, sizeof(uint64_t));
h->keys = calloc(size, sizeof(mh_key_t));
h->values = calloc(size, sizeof(mh_key_t));
h->same_hash = calloc(size, sizeof(uint32_t));
h->current_prev_hv = size;
h->prev_hv = calloc(size, sizeof(int));
for (int i = 0; i < size; i++) {
h->prev_hv[i] = -size;
}
return h;
}
int mh_put(mhash *h, mh_key_t key) {
int hv = hash_func(key) % h->n_buckets;
retry0:
uint64_t hv_bitmap = (1ull << (hv & 63));
if ((h->exists[hv / 64] & hv_bitmap) == 0) {
/* no confilct hash value and entry is fresh */
h->exists[hv / 64] |= hv_bitmap;
h->keys[hv] = key;
h->same_hash[hv] = 1;
h->prev_hv[hv] = -h->current_prev_hv;
h->current_prev_hv = hv;
pure++;
return hv;
}
else if (h->same_hash[hv] & 1) {
/* useing entry for same hash value */
uint64_t albitmap = ror(h->exists, hv, h->n_buckets);
uint64_t alorigin = albitmap >> 16;
uint64_t alexits = albitmap >> 32;
int empoff;
int lopop = __builtin_popcount(alorigin & ((1ull << 17) - 1));
int hipop = __builtin_popcount(alorigin >> 32);
if (lopop > hipop) {
empoff = 31 - __builtin_clz((~(uint32_t)alexits) & ((1ul << same_hash_size) - 1));
}
else {
empoff = __builtin_ctzll(alexits + 1);
}
if (empoff > 0 && empoff < same_hash_size) {
int newent = (empoff + hv) % h->n_buckets;
h->same_hash[hv] |= (1 << empoff);
h->exists[newent / 64] |= (1ull << (newent & 63));
h->same_hash[newent] = (empoff << 1);
h->keys[newent] = key;
crash++;
return newent;
}
else {
for (int i = 1; i < same_hash_size; i++) {
int srcent = (hv + i) % h->n_buckets;
uint16_t csh = h->same_hash[srcent];
if ((csh & 1) == 0 && csh != 0) {
csh >>= 1;
int hv2 = (srcent - csh) % h->n_buckets;
if (hv2 < 0) {
hv2 += h->n_buckets;
}
uint64_t alexits2 = ror(h->exists, hv2, h->n_buckets) >> 32;
int empoff = __builtin_ctzll(alexits2 + 1);
if (empoff < same_hash_size) {
int newent = (empoff + hv2) % h->n_buckets;
uint64_t hv_bitmap = 1ull << (newent & 63);
h->same_hash[hv2] ^= (1 << csh); /* Clear old entry */
h->same_hash[hv2] |= (1 << empoff); /* Set new entry */
h->keys[newent] = h->keys[srcent];
h->values[newent] = h->values[srcent];
h->exists[newent / 64] |= hv_bitmap;
h->same_hash[srcent] = i << 1;
h->same_hash[newent] = empoff << 1;
h->same_hash[hv] |= (1 << i);
h->keys[srcent] = key;
crash2++;
return srcent;
}
}
}
}
if (h->prev_hv[hv] < 0) {
int nhv;
nhv = -h->prev_hv[hv];
h->prev_hv[hv] = nhv;
hv = nhv;
}
else {
hv = h->prev_hv[hv];
}
para++;
goto retry0;
/* Oh! fill my entry. Must still from other entry */
abort();
}
else {
/* using entry for other hash value. */
int eoff = h->same_hash[hv] >> 1;
int hv2 = (hv - eoff) % h->n_buckets;
if (hv2 < 0) {
hv2 += h->n_buckets;
}
int ohv2 = hv2;
retry1:
uint64_t alexits = ror(h->exists, hv2, h->n_buckets) >> 32;
int empoff = __builtin_ctzll(alexits + 1);
if (empoff < same_hash_size) {
int newent = (empoff + hv2) % h->n_buckets;
uint64_t hv_bitmap = 1ull << (newent & 63);
h->same_hash[ohv2] ^= (1 << eoff); /* Clear old entry */
h->same_hash[hv2] |= (1 << empoff); /* Set new entry */
h->keys[newent] = h->keys[hv];
h->values[newent] = h->values[hv];
h->same_hash[newent] = empoff << 1;
h->exists[newent / 64] |= hv_bitmap;
h->same_hash[hv] = 1;
h->keys[hv] = key;
h->prev_hv[hv] = -h->current_prev_hv;
h->current_prev_hv = hv;
move++;
return hv;
}
if (h->prev_hv[hv2] < 0) {
int nhv2;
nhv2 = -h->prev_hv[hv2];
h->prev_hv[hv2] = nhv2;
hv2 = nhv2;
}
else {
hv2 = h->prev_hv[hv2];
}
mpara++;
goto retry1;
abort();
}
return h->n_buckets;
}
int mh_get(mhash *h, mh_key_t key) {
int hv = hash_func(key) % h->n_buckets;
uint16_t hbit = h->same_hash[hv];
if (hbit & 1) {
if (h->keys[hv] == key) {
return hv;
}
hbit ^= 1;
for (int i = __builtin_ctz(hbit); i > 0 && i < 16;i = __builtin_ctz(hbit)) {
int ent = (i + hv) % h->n_buckets;
if (h->keys[ent] == key) {
return ent;
}
hbit ^= (1 << i);
}
}
while (h->prev_hv[hv] >= 0) {
hv = h->prev_hv[hv];
hbit = h->same_hash[hv];
hbit ^= 1;
for (int i = __builtin_ctz(hbit); i > 0 && i < 16;i = __builtin_ctz(hbit)) {
int ent = (i + hv) % h->n_buckets;
if (h->keys[ent] == key) {
return ent;
}
hbit ^= (1 << i);
}
}
return -1;
}
int main(){
const int ndata = 65536;
mhash *h = mh_init(ndata);
for (int i = 0; i < ndata - 1000; i++) {
int ent = mh_put(h, i*i+i*2);
//printf("%d %d %d\n", i, ent, hash_func(i*i+i*2) % h->n_buckets);
h->values[ent] = i;
if ((i % 1000) == 0) {
//printf("%d %d %d %d %d %d %d \n", i, pure, crash, crash2, move, para, mpara);
}
}
// printf("fin\n");
for (int i = 0; i < ndata - 1000; i++) {
int ent = mh_get(h, i*i+i*2);
//printf("%d %d %d \n", i, h->values[ent], ent);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment