Created
September 28, 2023 09:10
-
-
Save miura1729/5e9194a72d3e300f906d8a7f9f03073c to your computer and use it in GitHub Desktop.
Can large hash table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <string.h> | |
typedef int mh_key_t; | |
typedef int mh_value_t; | |
typedef struct mhash { | |
uint64_t *exists; | |
mh_key_t *keys; | |
mh_value_t *values; | |
uint16_t *same_hash; | |
int n_buckets; | |
} mhash; | |
const int same_hash_size = 16; | |
uint64_t ror(uint64_t *base, int sn, int basesize) | |
{ | |
if (sn < 0) sn += basesize; | |
int snf = sn + 31; | |
if (snf > basesize) snf -= basesize; | |
int snfpos = snf / 64; | |
int snfoff = snf & 63; | |
uint64_t snfbit = (1ull << (snfoff + 1)) - 1; | |
if (snfoff == 63) { | |
snfbit = -1; | |
} | |
int snb = sn - 32; | |
if (snb < 0) snb += basesize; | |
int snbpos = snb / 64; | |
int snboff = snb & 63; | |
uint64_t snbbit = (1ull << (64 - snboff + 1)) - 1;; | |
if (snboff == 1) { | |
snbbit = -1; | |
} | |
if (snfpos == snbpos) { | |
/* Same bitmap */ | |
uint64_t bitmap = base[snfpos]; | |
return bitmap; | |
} | |
else { | |
uint64_t fbit = base[snfpos]; | |
uint64_t bbit = base[snbpos]; | |
fbit = (fbit & snfbit) << (64 - snboff); | |
bbit = (bbit >> snboff) & snbbit; | |
return fbit | bbit; | |
} | |
} | |
mhash *mh_init(int size) { | |
mhash *h = malloc(sizeof(mhash)); | |
h->n_buckets = size; | |
h->exists = calloc((size + 63) / 64, sizeof(uint64_t)); | |
h->keys = calloc(size, sizeof(mh_key_t)); | |
h->values = calloc(size, sizeof(mh_key_t)); | |
h->same_hash = calloc(size, sizeof(uint16_t)); | |
return h; | |
} | |
#define hash_func(hash) (unsigned int)((key)^((key)<<2)^((key)>>2)) | |
int mh_put(mhash *h, mh_key_t key) { | |
int hv = hash_func(key) % h->n_buckets; | |
uint64_t hv_bitmap = (1ull << (hv & 63)); | |
if ((h->exists[hv / 64] & hv_bitmap) == 0) { | |
/* no confilct hash value and entry is fresh */ | |
h->exists[hv / 64] |= hv_bitmap; | |
h->keys[hv] = key; | |
h->same_hash[hv] = 1; | |
return hv; | |
} | |
if (h->same_hash[hv] & 1) { | |
/* useing entry for same hash value */ | |
uint64_t albitmap = ror(h->exists, hv, h->n_buckets); | |
uint64_t alorigin = albitmap >> 16; | |
uint64_t alexits = albitmap >> 32; | |
int empoff; | |
int lopop = __builtin_popcount(alorigin & ((1ull << 25) - 1)); | |
int hipop = __builtin_popcount(alorigin >> 24); | |
if (lopop > hipop) { | |
empoff = 31 - __builtin_clz((~(uint32_t)alexits) & ((1 << same_hash_size) - 1)); | |
printf("bar %d %d %x\n", hv, empoff, alorigin); | |
} | |
else { | |
empoff = __builtin_ctzll(alexits + 1); | |
} | |
if (empoff > 0 && empoff < same_hash_size) { | |
int newent = (empoff + hv) % h->n_buckets; | |
h->same_hash[hv] |= (1 << empoff); | |
h->exists[newent / 64] |= (1ull << (newent & 63)); | |
h->same_hash[newent] = (empoff << 1); | |
h->keys[newent] = key; | |
return newent; | |
} | |
/* Oh! fill my entry. Must still from other entry */ | |
abort(); | |
} | |
/* using entry for other hash value. */ | |
{ | |
int eoff = h->same_hash[hv] >> 1; | |
int hv2 = (hv - eoff) % h->n_buckets; | |
if (hv2 < 0) { | |
hv2 += h->n_buckets; | |
} | |
uint64_t alexits = ror(h->exists, hv2, h->n_buckets); | |
int empoff = __builtin_ctzll(alexits + 1); | |
if (empoff < same_hash_size) { | |
int newent = (empoff + hv2) % h->n_buckets; | |
uint64_t hv_bitmap = 1ull << (newent & 63); | |
h->same_hash[hv2] ^= (1 << eoff); /* Clear old entry */ | |
h->same_hash[hv2] |= (1 << empoff); /* Set new entry */ | |
h->keys[newent] = h->keys[hv]; | |
h->values[newent] = h->values[hv]; | |
h->same_hash[newent] = empoff << 1; | |
h->exists[newent / 64] |= hv_bitmap; | |
h->same_hash[hv] = 1; | |
h->keys[hv] = key; | |
return hv; | |
} | |
abort(); | |
} | |
return h->n_buckets; | |
} | |
int mh_get(mhash *h, mh_key_t key) { | |
int hv = hash_func(key) % h->n_buckets; | |
uint16_t hbit = h->same_hash[hv]; | |
if (hbit & 1) { | |
for (int i = 0; i >= 0 && i < 16;i = __builtin_ctz(hbit)) { | |
int ent = (i + hv) % h->n_buckets; | |
if (h->keys[ent] == key) { | |
return ent; | |
} | |
hbit ^= (1 << i); | |
} | |
} | |
return -1; | |
} | |
main(){ | |
mhash *h = mh_init(1024); | |
for (int i = 0; i < 1024; i++) { | |
int ent = mh_put(h, i); | |
printf("%d %d \n", i, ent); | |
h->values[ent] = i + 1; | |
} | |
printf("fin\n"); | |
for (int i = 0; i < 1024; i++) { | |
int ent = mh_get(h, i); | |
printf("%d %d \n", h->values[ent], ent); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment