Blog 2020/8/20
<- previous | index | next ->
Let's learn how to implement a hash table in C! (See also part 1, part 2, and part 3).
Each time the population changes, we check if the array size should be adjusted.
void HashTableSet(HashTable* htp, void* keyp, size_t keyLength,
size_t keyHash, void* valuep
) {
+ size_t origPop = htp->population;
size_t slotIndex = keyHash % htp->capacity;
HTSlot* cursorp = htp->slotspp[slotIndex];
while (cursorp != NULL) {
if (cursorp->keyHash == keyHash
&& cursorp->keyLength == keyLength
&& memcmp(cursorp->keyp, keyp, keyLength) == 0
) {
if (cursorp->valuep == NULL && valuep != NULL) {
htp->population++;
} else if (cursorp->valuep != NULL && valuep == NULL) {
htp->population--;
}
cursorp->valuep = valuep;
- return;
+ goto adjust;
} else {
cursorp = cursorp->nextp;
continue;
}
}
// no existing match? append a new slot to the list.
if (valuep != NULL) {
HTSlot* newSlotp = _HTSlotMake();
newSlotp->keyp = keyp;
newSlotp->keyLength = keyLength;
newSlotp->keyHash = keyHash;
newSlotp->valuep = valuep;
if (htp->slotspp[slotIndex] == NULL) {
htp->slotspp[slotIndex] = newSlotp;
} else {
HTSlot* lastp = htp->slotspp[slotIndex];
while (lastp->nextp != NULL) {
lastp = lastp->nextp;
continue;
}
lastp->nextp = newSlotp;
}
htp->population++;
}
+
+adjust:
+ if (origPop != htp->population) {
+ _adjustCapacityIfNeeded(htp);
+ }
+}
To grow or shrink the array, a new array is allocated and all of the existing slots are migrated to their new positions in the new array, determined by modding the key hash by the new array size.
static void _adjustCapacityIfNeeded(HashTable* htp) {
size_t newCapacity;
if (_shouldShrink(htp)) {
newCapacity = htp->capacity / 2;
} else if (_shouldGrow(htp)) {
newCapacity = htp->capacity * 2;
} else {
return;
}
if (newCapacity < htp->minCapacity) {
return;
}
HTSlot** slots2pp = dmalloc(sizeof(void*) * newCapacity);
for (size_t i=0; i < htp->capacity; i++) {
HTSlot* cursorp = htp->slotspp[i];
while (cursorp != NULL) {
HTSlot* tmp = cursorp;
cursorp = cursorp->nextp;
if (tmp->valuep != NULL) {
tmp->nextp = NULL;
size_t slotIndex = tmp->keyHash % newCapacity;
if (slots2pp[slotIndex] == NULL) {
slots2pp[slotIndex] = tmp;
} else {
HTSlot* lastp = slots2pp[slotIndex];
while (lastp->nextp != NULL) {
lastp = lastp->nextp;
continue;
}
lastp->nextp = tmp;
}
}
continue;
}
}
free(htp->slotspp);
htp->slotspp = slots2pp;
htp->capacity = newCapacity;
}
static bool _shouldShrink(HashTable* htp) {
return htp->capacity >= (htp->population * 2);
}
static bool _shouldGrow(HashTable* htp) {
return htp->population >= (htp->capacity * 2);
}
We also store the capacity with which the hash table was initially created, and treat that as a minimum capacity which the hash table will not shrink below.
struct _HashTable {
HTSlot** slotspp;
size_t capacity;
size_t population;
+ size_t minCapacity;
};
typedef struct _HashTable HashTable;
HashTable* HashTableMake(size_t capacity) {
HashTable* htp = dmalloc(sizeof(HashTable));
htp->capacity = capacity;
+ htp->minCapacity = capacity;
htp->slotspp = dmalloc(sizeof(void*) * capacity);
return htp;
}
We add a couple of tests around resizing.
void testGrow() {
printf("%s... ", __FUNCTION__);
size_t capacity = 1;
HashTable* htp = HashTableMake(capacity);
assert(htp->population == 0);
assert(htp->capacity == capacity);
char* key1p = "hello";
size_t key1Length = strlen(key1p) + 1;
size_t key1Hash = terribleHash(key1p, key1Length);
char* value1p = "world";
HashTableSet(htp, key1p, key1Length, key1Hash, value1p);
assert(htp->population == 1);
assert(htp->capacity == capacity);
char* key2p = "foo";
size_t key2Length = strlen(key2p) + 1;
size_t key2Hash = terribleHash(key2p, key2Length);
char* value2p = "bar";
HashTableSet(htp, key2p, key2Length, key2Hash, value2p);
assert(htp->population == 2);
assert(htp->capacity == capacity*2);
printf("OK\n");
}
void testShrink() {
printf("%s... ", __FUNCTION__);
size_t capacity = 1;
HashTable* htp = HashTableMake(capacity);
assert(htp->population == 0);
assert(htp->capacity == capacity);
char* key1p = "hello";
size_t key1Length = strlen(key1p) + 1;
size_t key1Hash = terribleHash(key1p, key1Length);
char* value1p = "world";
HashTableSet(htp, key1p, key1Length, key1Hash, value1p);
assert(htp->population == 1);
assert(htp->capacity == capacity);
char* key2p = "foo";
size_t key2Length = strlen(key2p) + 1;
size_t key2Hash = terribleHash(key2p, key2Length);
char* value2p = "bar";
HashTableSet(htp, key2p, key2Length, key2Hash, value2p);
assert(htp->population == 2);
assert(htp->capacity == capacity*2);
HashTableSet(htp, key2p, key2Length, key2Hash, NULL);
assert(htp->population == 1);
assert(htp->capacity == capacity);
printf("OK\n");
}
./tests
testMake... OK
testFree... OK
testSet... OK
testGet... OK
testSlotCollision... OK
testHashCollision... OK
testGrow... OK
testShrink... OK
In part 5, we'll take a look at the importance of a good hash function.