Skip to content

Instantly share code, notes, and snippets.

@cellularmitosis
Last active August 22, 2020 02:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cellularmitosis/fdac60efe210d1cee26391b4a3beb891 to your computer and use it in GitHub Desktop.
Save cellularmitosis/fdac60efe210d1cee26391b4a3beb891 to your computer and use it in GitHub Desktop.
Hash table in C, part 4: automatic resizing

Blog 2020/8/20

<- previous | index | next ->

Hash table in C, part 4: automatic resizing

Let's learn how to implement a hash table in C! (See also part 1, part 2, and part 3).

Each time the population changes, we check if the array size should be adjusted.

hash4

 void HashTableSet(HashTable* htp, void* keyp, size_t keyLength,
     size_t keyHash, void* valuep
 ) {
+    size_t origPop = htp->population;
     size_t slotIndex = keyHash % htp->capacity;
     HTSlot* cursorp = htp->slotspp[slotIndex];
     while (cursorp != NULL) {
         if (cursorp->keyHash == keyHash
             && cursorp->keyLength == keyLength
             && memcmp(cursorp->keyp, keyp, keyLength) == 0
         ) {
             if (cursorp->valuep == NULL && valuep != NULL) {
                 htp->population++;
             } else if (cursorp->valuep != NULL && valuep == NULL) {
                 htp->population--;
             }
             cursorp->valuep = valuep;
-            return;
+            goto adjust;
         } else {
             cursorp = cursorp->nextp;
             continue;
         }
     }
 
     // no existing match?  append a new slot to the list.
     if (valuep != NULL) {
         HTSlot* newSlotp = _HTSlotMake();
         newSlotp->keyp = keyp;
         newSlotp->keyLength = keyLength;
         newSlotp->keyHash = keyHash;
         newSlotp->valuep = valuep;
         if (htp->slotspp[slotIndex] == NULL) {
             htp->slotspp[slotIndex] = newSlotp;
         } else {
             HTSlot* lastp = htp->slotspp[slotIndex];
             while (lastp->nextp != NULL) {
                 lastp = lastp->nextp;
                 continue;
             }
             lastp->nextp = newSlotp;
         }
         htp->population++;
     }
+
+adjust:
+    if (origPop != htp->population) {
+        _adjustCapacityIfNeeded(htp);
+    }
+}

To grow or shrink the array, a new array is allocated and all of the existing slots are migrated to their new positions in the new array, determined by modding the key hash by the new array size.

static void _adjustCapacityIfNeeded(HashTable* htp) {
    size_t newCapacity;
    if (_shouldShrink(htp)) {
        newCapacity = htp->capacity / 2;
    } else if (_shouldGrow(htp)) {
        newCapacity = htp->capacity * 2;
    } else {
        return;
    }

    if (newCapacity < htp->minCapacity) {
        return;
    }

    HTSlot** slots2pp = dmalloc(sizeof(void*) * newCapacity);
    for (size_t i=0; i < htp->capacity; i++) {
        HTSlot* cursorp = htp->slotspp[i];
        while (cursorp != NULL) {
            HTSlot* tmp = cursorp;
            cursorp = cursorp->nextp;
            if (tmp->valuep != NULL) {
                tmp->nextp = NULL;
                size_t slotIndex = tmp->keyHash % newCapacity;
                if (slots2pp[slotIndex] == NULL) {
                    slots2pp[slotIndex] = tmp;
                } else {
                    HTSlot* lastp = slots2pp[slotIndex];
                    while (lastp->nextp != NULL) {
                        lastp = lastp->nextp;
                        continue;
                    }
                    lastp->nextp = tmp;
                }
            }
            continue;
        }
    }
    free(htp->slotspp);
    htp->slotspp = slots2pp;
    htp->capacity = newCapacity;
}
static bool _shouldShrink(HashTable* htp) {
    return htp->capacity >= (htp->population * 2);
}
static bool _shouldGrow(HashTable* htp) {
    return htp->population >= (htp->capacity * 2);
}

Minimum capacity

We also store the capacity with which the hash table was initially created, and treat that as a minimum capacity which the hash table will not shrink below.

 struct _HashTable {
     HTSlot** slotspp;
     size_t capacity;
     size_t population;
+    size_t minCapacity;
 };
 typedef struct _HashTable HashTable;
 HashTable* HashTableMake(size_t capacity) {
     HashTable* htp = dmalloc(sizeof(HashTable));
     htp->capacity = capacity;
+    htp->minCapacity = capacity;
     htp->slotspp = dmalloc(sizeof(void*) * capacity);
     return htp;
 }

testGrow, testShrink

We add a couple of tests around resizing.

void testGrow() {
    printf("%s... ", __FUNCTION__);
    size_t capacity = 1;
    HashTable* htp = HashTableMake(capacity);
    assert(htp->population == 0);
    assert(htp->capacity == capacity);

    char* key1p = "hello";
    size_t key1Length = strlen(key1p) + 1;
    size_t key1Hash = terribleHash(key1p, key1Length);
    char* value1p = "world";
    HashTableSet(htp, key1p, key1Length, key1Hash, value1p);
    assert(htp->population == 1);
    assert(htp->capacity == capacity);

    char* key2p = "foo";
    size_t key2Length = strlen(key2p) + 1;
    size_t key2Hash = terribleHash(key2p, key2Length);
    char* value2p = "bar";
    HashTableSet(htp, key2p, key2Length, key2Hash, value2p);
    assert(htp->population == 2);
    assert(htp->capacity == capacity*2);

    printf("OK\n");
}
void testShrink() {
    printf("%s... ", __FUNCTION__);
    size_t capacity = 1;
    HashTable* htp = HashTableMake(capacity);
    assert(htp->population == 0);
    assert(htp->capacity == capacity);

    char* key1p = "hello";
    size_t key1Length = strlen(key1p) + 1;
    size_t key1Hash = terribleHash(key1p, key1Length);
    char* value1p = "world";
    HashTableSet(htp, key1p, key1Length, key1Hash, value1p);
    assert(htp->population == 1);
    assert(htp->capacity == capacity);

    char* key2p = "foo";
    size_t key2Length = strlen(key2p) + 1;
    size_t key2Hash = terribleHash(key2p, key2Length);
    char* value2p = "bar";
    HashTableSet(htp, key2p, key2Length, key2Hash, value2p);
    assert(htp->population == 2);
    assert(htp->capacity == capacity*2);

    HashTableSet(htp, key2p, key2Length, key2Hash, NULL);
    assert(htp->population == 1);
    assert(htp->capacity == capacity);

    printf("OK\n");
}
./tests
testMake... OK
testFree... OK
testSet... OK
testGet... OK
testSlotCollision... OK
testHashCollision... OK
testGrow... OK
testShrink... OK

Next time

In part 5, we'll take a look at the importance of a good hash function.

#include "HashTable.h"
#include <stdio.h> // fprintf
#include <stdlib.h> // exit
#include <string.h> // memset
#include <stdint.h> // uint8_t
#include <stdbool.h> // true
#include <assert.h> // assert
void* dmalloc(size_t size) {
void* p = malloc(size);
if (p == NULL) {
perror("Error: malloc failed: ");
exit(1);
}
memset(p, 0, size);
return p;
}
#define malloc(x) dont_use_malloc_use_dmalloc
size_t terribleHash(void* datap, size_t length) {
size_t sum = 0;
for (size_t i=0; i < length; i++) {
uint8_t* byte = datap + i;
sum += *byte;
continue;
}
return sum;
}
static HTSlot* _HTSlotMake() {
HTSlot* sp = dmalloc(sizeof(HTSlot));
return sp;
}
HashTable* HashTableMake(size_t capacity) {
HashTable* htp = dmalloc(sizeof(HashTable));
htp->capacity = capacity;
htp->minCapacity = capacity;
htp->slotspp = dmalloc(sizeof(void*) * capacity);
return htp;
}
void HashTableFree(HashTable* htp) {
for (size_t i=0; i < htp->capacity; i++) {
HTSlot* slotp = htp->slotspp[i];
while (slotp != NULL) {
HTSlot* tmp = slotp;
free(slotp);
slotp = tmp->nextp;
continue;
}
}
free(htp->slotspp);
free(htp);
}
void* HashTableGet(HashTable* htp, void* keyp, size_t keyLength,
size_t keyHash
) {
size_t slotIndex = keyHash % htp->capacity;
HTSlot* cursorp = htp->slotspp[slotIndex];
while (cursorp != NULL) {
if (cursorp->keyHash == keyHash
&& cursorp->keyLength == keyLength
&& memcmp(cursorp->keyp, keyp, keyLength) == 0
) {
return cursorp->valuep;
} else {
cursorp = cursorp->nextp;
continue;
}
}
return NULL;
}
static void _adjustCapacityIfNeeded(HashTable* htp);
void HashTableSet(HashTable* htp, void* keyp, size_t keyLength,
size_t keyHash, void* valuep
) {
size_t origPop = htp->population;
size_t slotIndex = keyHash % htp->capacity;
HTSlot* cursorp = htp->slotspp[slotIndex];
while (cursorp != NULL) {
if (cursorp->keyHash == keyHash
&& cursorp->keyLength == keyLength
&& memcmp(cursorp->keyp, keyp, keyLength) == 0
) {
if (cursorp->valuep == NULL && valuep != NULL) {
htp->population++;
} else if (cursorp->valuep != NULL && valuep == NULL) {
htp->population--;
}
cursorp->valuep = valuep;
goto adjust;
} else {
cursorp = cursorp->nextp;
continue;
}
}
// no existing match? append a new slot to the list.
if (valuep != NULL) {
HTSlot* newSlotp = _HTSlotMake();
newSlotp->keyp = keyp;
newSlotp->keyLength = keyLength;
newSlotp->keyHash = keyHash;
newSlotp->valuep = valuep;
if (htp->slotspp[slotIndex] == NULL) {
htp->slotspp[slotIndex] = newSlotp;
} else {
HTSlot* lastp = htp->slotspp[slotIndex];
while (lastp->nextp != NULL) {
lastp = lastp->nextp;
continue;
}
lastp->nextp = newSlotp;
}
htp->population++;
}
adjust:
if (origPop != htp->population) {
_adjustCapacityIfNeeded(htp);
}
}
static bool _shouldShrink(HashTable* htp) {
return htp->capacity >= (htp->population * 2);
}
static bool _shouldGrow(HashTable* htp) {
return htp->population >= (htp->capacity * 2);
}
static void _adjustCapacityIfNeeded(HashTable* htp) {
size_t newCapacity;
if (_shouldShrink(htp)) {
newCapacity = htp->capacity / 2;
} else if (_shouldGrow(htp)) {
newCapacity = htp->capacity * 2;
} else {
return;
}
if (newCapacity < htp->minCapacity) {
return;
}
HTSlot** slots2pp = dmalloc(sizeof(void*) * newCapacity);
for (size_t i=0; i < htp->capacity; i++) {
HTSlot* cursorp = htp->slotspp[i];
while (cursorp != NULL) {
HTSlot* tmp = cursorp;
cursorp = cursorp->nextp;
if (tmp->valuep != NULL) {
tmp->nextp = NULL;
size_t slotIndex = tmp->keyHash % newCapacity;
if (slots2pp[slotIndex] == NULL) {
slots2pp[slotIndex] = tmp;
} else {
HTSlot* lastp = slots2pp[slotIndex];
while (lastp->nextp != NULL) {
lastp = lastp->nextp;
continue;
}
lastp->nextp = tmp;
}
}
continue;
}
}
free(htp->slotspp);
htp->slotspp = slots2pp;
htp->capacity = newCapacity;
}
#ifndef _HASHTABLE_H_
#define _HASHTABLE_H_
#include <unistd.h> // size_t
size_t terribleHash(void* datap, size_t length);
struct _HTSlot {
void* keyp;
size_t keyLength;
size_t keyHash;
void* valuep;
struct _HTSlot* nextp;
};
typedef struct _HTSlot HTSlot;
struct _HashTable {
HTSlot** slotspp;
size_t capacity;
size_t population;
size_t minCapacity;
};
typedef struct _HashTable HashTable;
HashTable* HashTableMake(size_t capacity);
void HashTableFree(HashTable* htp);
void* HashTableGet(HashTable* htp, void* keyp, size_t keyLength,
size_t keyHash
);
void HashTableSet(HashTable* htp, void* keyp, size_t keyLength, size_t keyHash,
void* valuep
);
#endif
default: tests
./tests
tests: HashTable.o tests.c
gcc -g -std=c99 -Wall -Werror HashTable.o tests.c -o tests
HashTable.o: HashTable.h HashTable.c
gcc -g -std=c99 -Wall -Werror -c HashTable.c
clean:
rm -f HashTable.o tests
.PHONY: default clean
#include "HashTable.h"
#include <assert.h> // assert
#include <stdio.h> // printf
#include <string.h> // strlen, strcmp
void testMake() {
printf("%s... ", __FUNCTION__);
size_t capacity = 3;
HashTable* htp = HashTableMake(capacity);
assert(htp != NULL);
assert(htp->capacity == 3);
printf("OK\n");
}
void testFree() {
printf("%s... ", __FUNCTION__);
size_t capacity = 3;
HashTable* htp = HashTableMake(capacity);
HashTableFree(htp);
printf("OK\n");
}
void testSet() {
printf("%s... ", __FUNCTION__);
size_t capacity = 3;
HashTable* htp = HashTableMake(capacity);
char* keyp = "hello";
size_t keyLength = strlen(keyp) + 1;
size_t keyHash = terribleHash(keyp, keyLength);
char* valuep = "world";
HashTableSet(htp, keyp, keyLength, keyHash, valuep);
assert(htp->population == 1);
printf("OK\n");
}
void testGet() {
printf("%s... ", __FUNCTION__);
size_t capacity = 3;
HashTable* htp = HashTableMake(capacity);
char* keyp = "hello";
size_t keyLength = strlen(keyp) + 1;
size_t keyHash = terribleHash(keyp, keyLength);
char* valuep = "world";
HashTableSet(htp, keyp, keyLength, keyHash, valuep);
char* gotp = HashTableGet(htp, keyp, keyLength, keyHash);
assert(strcmp(valuep, gotp) == 0);
printf("OK\n");
}
void testSlotCollision() {
printf("%s... ", __FUNCTION__);
size_t capacity = 1;
HashTable* htp = HashTableMake(capacity);
char* key1p = "hello";
size_t key1Length = strlen(key1p) + 1;
size_t key1Hash = terribleHash(key1p, key1Length);
char* value1p = "world";
HashTableSet(htp, key1p, key1Length, key1Hash, value1p);
assert(htp->population == 1);
char* key2p = "foo";
size_t key2Length = strlen(key2p) + 1;
size_t key2Hash = terribleHash(key2p, key2Length);
char* value2p = "bar";
HashTableSet(htp, key2p, key2Length, key2Hash, value2p);
assert(htp->population == 2);
char* got1p = HashTableGet(htp, key1p, key1Length, key1Hash);
assert(strcmp(value1p, got1p) == 0);
char* got2p = HashTableGet(htp, key2p, key2Length, key2Hash);
assert(strcmp(value2p, got2p) == 0);
printf("OK\n");
}
void testHashCollision() {
printf("%s... ", __FUNCTION__);
size_t capacity = 1;
HashTable* htp = HashTableMake(capacity);
char* key1p = "ab";
size_t key1Length = strlen(key1p) + 1;
size_t key1Hash = terribleHash(key1p, key1Length);
char* value1p = "cd";
HashTableSet(htp, key1p, key1Length, key1Hash, value1p);
assert(htp->population == 1);
char* key2p = "ba";
size_t key2Length = strlen(key2p) + 1;
size_t key2Hash = terribleHash(key2p, key2Length);
char* value2p = "dc";
HashTableSet(htp, key2p, key2Length, key2Hash, value2p);
assert(htp->population == 2);
char* got1p = HashTableGet(htp, key1p, key1Length, key1Hash);
assert(strcmp(value1p, got1p) == 0);
char* got2p = HashTableGet(htp, key2p, key2Length, key2Hash);
assert(strcmp(value2p, got2p) == 0);
printf("OK\n");
}
void testGrow() {
printf("%s... ", __FUNCTION__);
size_t capacity = 1;
HashTable* htp = HashTableMake(capacity);
assert(htp->population == 0);
assert(htp->capacity == capacity);
char* key1p = "hello";
size_t key1Length = strlen(key1p) + 1;
size_t key1Hash = terribleHash(key1p, key1Length);
char* value1p = "world";
HashTableSet(htp, key1p, key1Length, key1Hash, value1p);
assert(htp->population == 1);
assert(htp->capacity == capacity);
char* key2p = "foo";
size_t key2Length = strlen(key2p) + 1;
size_t key2Hash = terribleHash(key2p, key2Length);
char* value2p = "bar";
HashTableSet(htp, key2p, key2Length, key2Hash, value2p);
assert(htp->population == 2);
assert(htp->capacity == capacity*2);
printf("OK\n");
}
void testShrink() {
printf("%s... ", __FUNCTION__);
size_t capacity = 1;
HashTable* htp = HashTableMake(capacity);
assert(htp->population == 0);
assert(htp->capacity == capacity);
char* key1p = "hello";
size_t key1Length = strlen(key1p) + 1;
size_t key1Hash = terribleHash(key1p, key1Length);
char* value1p = "world";
HashTableSet(htp, key1p, key1Length, key1Hash, value1p);
assert(htp->population == 1);
assert(htp->capacity == capacity);
char* key2p = "foo";
size_t key2Length = strlen(key2p) + 1;
size_t key2Hash = terribleHash(key2p, key2Length);
char* value2p = "bar";
HashTableSet(htp, key2p, key2Length, key2Hash, value2p);
assert(htp->population == 2);
assert(htp->capacity == capacity*2);
HashTableSet(htp, key2p, key2Length, key2Hash, NULL);
assert(htp->population == 1);
assert(htp->capacity == capacity);
printf("OK\n");
}
int main(int argc, char** argv) {
testMake();
testFree();
testSet();
testGet();
testSlotCollision();
testHashCollision();
testGrow();
testShrink();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment