Skip to content

Instantly share code, notes, and snippets.

@cellularmitosis
Last active August 20, 2020 22:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cellularmitosis/f466ab1fd404233796416aab8ad5b4f1 to your computer and use it in GitHub Desktop.
Save cellularmitosis/f466ab1fd404233796416aab8ad5b4f1 to your computer and use it in GitHub Desktop.
Hash table in C, part 2: chaining

Blog 2020/8/20

<- previous | index | next ->

Hash table in C, part 2: chaining

Let's learn how to implement a hash table in C! (See also part 1).

One technique to resolve the problem of "slot collisions" is the concept of chaining, which is a fancy way of saying "use linked lists".

hash2

The HTSlot linked list

We'll call our linked list node an HTSlot. It will contain:

  • a key's hash
  • a pointer to a value
  • a pointer to the next HTSlot
struct _HTSlot {
    size_t keyHash;
    void* valuep;
    struct _HTSlot* nextp;
};
typedef struct _HTSlot HTSlot;

Our HashTable needs to be updated.

 struct _HashTable {
-    void** slotspp;
+    HTSlot** slotspp;
     size_t capacity;
     size_t population;
 };

HashTableFree needs to walk and free each HTSlot list.

 void HashTableFree(HashTable* htp) {
+    for (size_t i=0; i < htp->capacity; i++) {
+        HTSlot* slotp = htp->slotspp[i];
+        while (slotp != NULL) {
+            HTSlot* tmp = slotp;
+            free(slotp);
+            slotp = tmp->nextp;
+            continue;
+        }
+    }
     free(htp->slotspp);
     free(htp);
 }

Get and set

HashTableGet needs to walk each slot list and look for an exact key hash match.

 void* HashTableGet(HashTable* htp, size_t keyHash) {
     size_t slotIndex = keyHash % htp->capacity;
-    return htp->slotspp[slotIndex];
+    HTSlot* cursorp = htp->slotspp[slotIndex];
+    while (cursorp != NULL) {
+        if (cursorp->keyHash == keyHash) {
+            return cursorp->valuep;
+        } else {
+            cursorp = cursorp->nextp;
+            continue;
+        }
+    }
+    return NULL;
 }

HashTableSet walks the slot list, looking for a match to update. If it doesn't find one, it appends a new slot onto the list.

void HashTableSet(HashTable* htp, size_t keyHash, void* valuep) {
    size_t slotIndex = keyHash % htp->capacity;
    HTSlot* cursorp = htp->slotspp[slotIndex];
    while (cursorp != NULL) {
        if (cursorp->keyHash == keyHash) {
            cursorp->valuep = valuep;
            if (cursorp->valuep == NULL && valuep != NULL) {
                htp->population++;
            } else if (cursorp->valuep != NULL && valuep == NULL) {
                htp->population--;
            }
            return;
        } else {
            cursorp = cursorp->nextp;
            continue;
        }
    }

    // no existing match?  append a new slot to the list.
    if (valuep != NULL) {
        HTSlot* newSlotp = _HTSlotMake();
        newSlotp->keyHash = keyHash;
        newSlotp->valuep = valuep;
        if (htp->slotspp[slotIndex] == NULL) {
            htp->slotspp[slotIndex] = newSlotp;
        } else {
            HTSlot* lastp = htp->slotspp[slotIndex];
            while (lastp->nextp != NULL) {
                lastp = lastp->nextp;
                continue;
            }
            lastp->nextp = newSlotp;
        }
        htp->population++;
    }
}

No more slot collisions

With these changes, our slot collision test passes!

./tests
testMake... OK
testFree... OK
testSet... OK
testGet... OK
testSlotCollision... OK

New problem: hash collisions

However, there is still a problem with this hash table: it doesn't handle the case where two different keys happen to produce the same hash.

We can demonstrate this with a test. This is an easy task for us, as terribleHash is so simplistic that intentionally creating a hash collision is as easy as rearranging the letters in a key.

void testHashCollision() {
    printf("%s... ", __FUNCTION__);
    size_t capacity = 1;
    HashTable* htp = HashTableMake(capacity);

    char* key1p = "ab";
    char* value1p = "cd";
    size_t key1Hash = terribleHash(key1p, strlen(key1p)+1);
    HashTableSet(htp, key1Hash, value1p);
    assert(htp->population == 1);

    char* key2p = "ba";
    char* value2p = "dc";
    size_t key2Hash = terribleHash(key2p, strlen(key2p)+1);
    HashTableSet(htp, key2Hash, value2p);
    assert(htp->population == 2);

    char* got1p = HashTableGet(htp, key1Hash);
    assert(strcmp(value1p, got1p) == 0);

    char* got2p = HashTableGet(htp, key2Hash);
    assert(strcmp(value2p, got2p) == 0);

    printf("OK\n");
}
tests: tests.c:90: testHashCollision: Assertion `htp->population == 2' failed.

Next time

In part 3, we account for hash collisions.

#include "HashTable.h"
#include <stdio.h> // fprintf
#include <stdlib.h> // exit
#include <string.h> // memset
#include <stdint.h> // uint8_t
#include <stdbool.h> // true
#include <assert.h> // assert
void* dmalloc(size_t size) {
void* p = malloc(size);
if (p == NULL) {
perror("Error: malloc failed: ");
exit(1);
}
memset(p, 0, size);
return p;
}
#define malloc(x) dont_use_malloc_use_dmalloc
size_t terribleHash(void* datap, size_t length) {
size_t sum = 0;
for (size_t i=0; i < length; i++) {
uint8_t* byte = datap + i;
sum += *byte;
continue;
}
return sum;
}
static HTSlot* _HTSlotMake() {
HTSlot* sp = dmalloc(sizeof(HTSlot));
return sp;
}
HashTable* HashTableMake(size_t capacity) {
HashTable* htp = dmalloc(sizeof(HashTable));
htp->capacity = capacity;
htp->slotspp = dmalloc(sizeof(void*) * capacity);
return htp;
}
void HashTableFree(HashTable* htp) {
for (size_t i=0; i < htp->capacity; i++) {
HTSlot* slotp = htp->slotspp[i];
while (slotp != NULL) {
HTSlot* tmp = slotp;
free(slotp);
slotp = tmp->nextp;
continue;
}
}
free(htp->slotspp);
free(htp);
}
void* HashTableGet(HashTable* htp, size_t keyHash) {
size_t slotIndex = keyHash % htp->capacity;
HTSlot* cursorp = htp->slotspp[slotIndex];
while (cursorp != NULL) {
if (cursorp->keyHash == keyHash) {
return cursorp->valuep;
} else {
cursorp = cursorp->nextp;
continue;
}
}
return NULL;
}
void HashTableSet(HashTable* htp, size_t keyHash, void* valuep) {
size_t slotIndex = keyHash % htp->capacity;
HTSlot* cursorp = htp->slotspp[slotIndex];
while (cursorp != NULL) {
if (cursorp->keyHash == keyHash) {
if (cursorp->valuep == NULL && valuep != NULL) {
htp->population++;
} else if (cursorp->valuep != NULL && valuep == NULL) {
htp->population--;
}
cursorp->valuep = valuep;
return;
} else {
cursorp = cursorp->nextp;
continue;
}
}
// no existing match? append a new slot to the list.
if (valuep != NULL) {
HTSlot* newSlotp = _HTSlotMake();
newSlotp->keyHash = keyHash;
newSlotp->valuep = valuep;
if (htp->slotspp[slotIndex] == NULL) {
htp->slotspp[slotIndex] = newSlotp;
} else {
HTSlot* lastp = htp->slotspp[slotIndex];
while (lastp->nextp != NULL) {
lastp = lastp->nextp;
continue;
}
lastp->nextp = newSlotp;
}
htp->population++;
}
}
#ifndef _HASHTABLE_H_
#define _HASHTABLE_H_
#include <unistd.h> // size_t
size_t terribleHash(void* datap, size_t length);
struct _HTSlot {
size_t keyHash;
void* valuep;
struct _HTSlot* nextp;
};
typedef struct _HTSlot HTSlot;
struct _HashTable {
HTSlot** slotspp;
size_t capacity;
size_t population;
};
typedef struct _HashTable HashTable;
HashTable* HashTableMake(size_t capacity);
void HashTableFree(HashTable* htp);
void* HashTableGet(HashTable* htp, size_t keyHash);
void HashTableSet(HashTable* htp, size_t keyHash, void* valuep);
#endif
default: tests
./tests
tests: HashTable.o tests.c
gcc -g -std=c99 -Wall -Werror HashTable.o tests.c -o tests
HashTable.o: HashTable.h HashTable.c
gcc -g -std=c99 -Wall -Werror -c HashTable.c
clean:
rm -f HashTable.o tests
.PHONY: default clean
#include "HashTable.h"
#include <assert.h> // assert
#include <stdio.h> // printf
#include <string.h> // strlen, strcmp
void testMake() {
printf("%s... ", __FUNCTION__);
size_t capacity = 3;
HashTable* htp = HashTableMake(capacity);
assert(htp != NULL);
assert(htp->capacity == 3);
printf("OK\n");
}
void testFree() {
printf("%s... ", __FUNCTION__);
size_t capacity = 3;
HashTable* htp = HashTableMake(capacity);
HashTableFree(htp);
printf("OK\n");
}
void testSet() {
printf("%s... ", __FUNCTION__);
size_t capacity = 3;
HashTable* htp = HashTableMake(capacity);
char* keyp = "hello";
char* valuep = "world";
size_t keyHash = terribleHash(keyp, strlen(keyp)+1);
HashTableSet(htp, keyHash, valuep);
assert(htp->population == 1);
printf("OK\n");
}
void testGet() {
printf("%s... ", __FUNCTION__);
size_t capacity = 3;
HashTable* htp = HashTableMake(capacity);
char* keyp = "hello";
char* valuep = "world";
size_t keyHash = terribleHash(keyp, strlen(keyp)+1);
HashTableSet(htp, keyHash, valuep);
char* gotp = HashTableGet(htp, keyHash);
assert(strcmp(valuep, gotp) == 0);
printf("OK\n");
}
void testSlotCollision() {
printf("%s... ", __FUNCTION__);
size_t capacity = 1;
HashTable* htp = HashTableMake(capacity);
char* key1p = "hello";
char* value1p = "world";
size_t key1Hash = terribleHash(key1p, strlen(key1p)+1);
HashTableSet(htp, key1Hash, value1p);
assert(htp->population == 1);
char* key2p = "foo";
char* value2p = "bar";
size_t key2Hash = terribleHash(key2p, strlen(key2p)+1);
HashTableSet(htp, key2Hash, value2p);
assert(htp->population == 2);
char* got1p = HashTableGet(htp, key1Hash);
assert(strcmp(value1p, got1p) == 0);
char* got2p = HashTableGet(htp, key2Hash);
assert(strcmp(value2p, got2p) == 0);
printf("OK\n");
}
void testHashCollision() {
printf("%s... ", __FUNCTION__);
size_t capacity = 1;
HashTable* htp = HashTableMake(capacity);
char* key1p = "ab";
char* value1p = "cd";
size_t key1Hash = terribleHash(key1p, strlen(key1p)+1);
HashTableSet(htp, key1Hash, value1p);
assert(htp->population == 1);
char* key2p = "ba";
char* value2p = "dc";
size_t key2Hash = terribleHash(key2p, strlen(key2p)+1);
HashTableSet(htp, key2Hash, value2p);
assert(htp->population == 2);
char* got1p = HashTableGet(htp, key1Hash);
assert(strcmp(value1p, got1p) == 0);
char* got2p = HashTableGet(htp, key2Hash);
assert(strcmp(value2p, got2p) == 0);
printf("OK\n");
}
int main(int argc, char** argv) {
testMake();
testFree();
testSet();
testGet();
testSlotCollision();
testHashCollision();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment