Instantly share code, notes, and snippets.

# cellularmitosis/HashTable.c Secret

Last active August 20, 2020 22:01
Show Gist options
• Save cellularmitosis/f466ab1fd404233796416aab8ad5b4f1 to your computer and use it in GitHub Desktop.
Hash table in C, part 2: chaining

Blog 2020/8/20

# Hash table in C, part 2: chaining

Let's learn how to implement a hash table in C! (See also part 1).

One technique to resolve the problem of "slot collisions" is the concept of chaining, which is a fancy way of saying "use linked lists".

## The `HTSlot` linked list

We'll call our linked list node an `HTSlot`. It will contain:

• a key's hash
• a pointer to a value
• a pointer to the next `HTSlot`
```struct _HTSlot {
size_t keyHash;
void* valuep;
struct _HTSlot* nextp;
};
typedef struct _HTSlot HTSlot;```

Our `HashTable` needs to be updated.

``` struct _HashTable {
-    void** slotspp;
+    HTSlot** slotspp;
size_t capacity;
size_t population;
};```

`HashTableFree` needs to walk and free each `HTSlot` list.

``` void HashTableFree(HashTable* htp) {
+    for (size_t i=0; i < htp->capacity; i++) {
+        HTSlot* slotp = htp->slotspp[i];
+        while (slotp != NULL) {
+            HTSlot* tmp = slotp;
+            free(slotp);
+            slotp = tmp->nextp;
+            continue;
+        }
+    }
free(htp->slotspp);
free(htp);
}```

## Get and set

`HashTableGet` needs to walk each slot list and look for an exact key hash match.

``` void* HashTableGet(HashTable* htp, size_t keyHash) {
size_t slotIndex = keyHash % htp->capacity;
-    return htp->slotspp[slotIndex];
+    HTSlot* cursorp = htp->slotspp[slotIndex];
+    while (cursorp != NULL) {
+        if (cursorp->keyHash == keyHash) {
+            return cursorp->valuep;
+        } else {
+            cursorp = cursorp->nextp;
+            continue;
+        }
+    }
+    return NULL;
}```

`HashTableSet` walks the slot list, looking for a match to update. If it doesn't find one, it appends a new slot onto the list.

```void HashTableSet(HashTable* htp, size_t keyHash, void* valuep) {
size_t slotIndex = keyHash % htp->capacity;
HTSlot* cursorp = htp->slotspp[slotIndex];
while (cursorp != NULL) {
if (cursorp->keyHash == keyHash) {
cursorp->valuep = valuep;
if (cursorp->valuep == NULL && valuep != NULL) {
htp->population++;
} else if (cursorp->valuep != NULL && valuep == NULL) {
htp->population--;
}
return;
} else {
cursorp = cursorp->nextp;
continue;
}
}

// no existing match?  append a new slot to the list.
if (valuep != NULL) {
HTSlot* newSlotp = _HTSlotMake();
newSlotp->keyHash = keyHash;
newSlotp->valuep = valuep;
if (htp->slotspp[slotIndex] == NULL) {
htp->slotspp[slotIndex] = newSlotp;
} else {
HTSlot* lastp = htp->slotspp[slotIndex];
while (lastp->nextp != NULL) {
lastp = lastp->nextp;
continue;
}
lastp->nextp = newSlotp;
}
htp->population++;
}
}```

## No more slot collisions

With these changes, our slot collision test passes!

``````./tests
testMake... OK
testFree... OK
testSet... OK
testGet... OK
testSlotCollision... OK
``````

## New problem: hash collisions

However, there is still a problem with this hash table: it doesn't handle the case where two different keys happen to produce the same hash.

We can demonstrate this with a test. This is an easy task for us, as `terribleHash` is so simplistic that intentionally creating a hash collision is as easy as rearranging the letters in a key.

```void testHashCollision() {
printf("%s... ", __FUNCTION__);
size_t capacity = 1;
HashTable* htp = HashTableMake(capacity);

char* key1p = "ab";
char* value1p = "cd";
size_t key1Hash = terribleHash(key1p, strlen(key1p)+1);
HashTableSet(htp, key1Hash, value1p);
assert(htp->population == 1);

char* key2p = "ba";
char* value2p = "dc";
size_t key2Hash = terribleHash(key2p, strlen(key2p)+1);
HashTableSet(htp, key2Hash, value2p);
assert(htp->population == 2);

char* got1p = HashTableGet(htp, key1Hash);
assert(strcmp(value1p, got1p) == 0);

char* got2p = HashTableGet(htp, key2Hash);
assert(strcmp(value2p, got2p) == 0);

printf("OK\n");
}```
``````tests: tests.c:90: testHashCollision: Assertion `htp->population == 2' failed.
``````

## Next time

In part 3, we account for hash collisions.

This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 #include "HashTable.h" #include // fprintf #include // exit #include // memset #include // uint8_t #include // true #include // assert void* dmalloc(size_t size) { void* p = malloc(size); if (p == NULL) { perror("Error: malloc failed: "); exit(1); } memset(p, 0, size); return p; } #define malloc(x) dont_use_malloc_use_dmalloc size_t terribleHash(void* datap, size_t length) { size_t sum = 0; for (size_t i=0; i < length; i++) { uint8_t* byte = datap + i; sum += *byte; continue; } return sum; } static HTSlot* _HTSlotMake() { HTSlot* sp = dmalloc(sizeof(HTSlot)); return sp; } HashTable* HashTableMake(size_t capacity) { HashTable* htp = dmalloc(sizeof(HashTable)); htp->capacity = capacity; htp->slotspp = dmalloc(sizeof(void*) * capacity); return htp; } void HashTableFree(HashTable* htp) { for (size_t i=0; i < htp->capacity; i++) { HTSlot* slotp = htp->slotspp[i]; while (slotp != NULL) { HTSlot* tmp = slotp; free(slotp); slotp = tmp->nextp; continue; } } free(htp->slotspp); free(htp); } void* HashTableGet(HashTable* htp, size_t keyHash) { size_t slotIndex = keyHash % htp->capacity; HTSlot* cursorp = htp->slotspp[slotIndex]; while (cursorp != NULL) { if (cursorp->keyHash == keyHash) { return cursorp->valuep; } else { cursorp = cursorp->nextp; continue; } } return NULL; } void HashTableSet(HashTable* htp, size_t keyHash, void* valuep) { size_t slotIndex = keyHash % htp->capacity; HTSlot* cursorp = htp->slotspp[slotIndex]; while (cursorp != NULL) { if (cursorp->keyHash == keyHash) { if (cursorp->valuep == NULL && valuep != NULL) { htp->population++; } else if (cursorp->valuep != NULL && valuep == NULL) { htp->population--; } cursorp->valuep = valuep; return; } else { cursorp = cursorp->nextp; continue; } } // no existing match? append a new slot to the list. if (valuep != NULL) { HTSlot* newSlotp = _HTSlotMake(); newSlotp->keyHash = keyHash; newSlotp->valuep = valuep; if (htp->slotspp[slotIndex] == NULL) { htp->slotspp[slotIndex] = newSlotp; } else { HTSlot* lastp = htp->slotspp[slotIndex]; while (lastp->nextp != NULL) { lastp = lastp->nextp; continue; } lastp->nextp = newSlotp; } htp->population++; } }
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 #ifndef _HASHTABLE_H_ #define _HASHTABLE_H_ #include // size_t size_t terribleHash(void* datap, size_t length); struct _HTSlot { size_t keyHash; void* valuep; struct _HTSlot* nextp; }; typedef struct _HTSlot HTSlot; struct _HashTable { HTSlot** slotspp; size_t capacity; size_t population; }; typedef struct _HashTable HashTable; HashTable* HashTableMake(size_t capacity); void HashTableFree(HashTable* htp); void* HashTableGet(HashTable* htp, size_t keyHash); void HashTableSet(HashTable* htp, size_t keyHash, void* valuep); #endif
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 default: tests ./tests tests: HashTable.o tests.c gcc -g -std=c99 -Wall -Werror HashTable.o tests.c -o tests HashTable.o: HashTable.h HashTable.c gcc -g -std=c99 -Wall -Werror -c HashTable.c clean: rm -f HashTable.o tests .PHONY: default clean
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 #include "HashTable.h" #include // assert #include // printf #include // strlen, strcmp void testMake() { printf("%s... ", __FUNCTION__); size_t capacity = 3; HashTable* htp = HashTableMake(capacity); assert(htp != NULL); assert(htp->capacity == 3); printf("OK\n"); } void testFree() { printf("%s... ", __FUNCTION__); size_t capacity = 3; HashTable* htp = HashTableMake(capacity); HashTableFree(htp); printf("OK\n"); } void testSet() { printf("%s... ", __FUNCTION__); size_t capacity = 3; HashTable* htp = HashTableMake(capacity); char* keyp = "hello"; char* valuep = "world"; size_t keyHash = terribleHash(keyp, strlen(keyp)+1); HashTableSet(htp, keyHash, valuep); assert(htp->population == 1); printf("OK\n"); } void testGet() { printf("%s... ", __FUNCTION__); size_t capacity = 3; HashTable* htp = HashTableMake(capacity); char* keyp = "hello"; char* valuep = "world"; size_t keyHash = terribleHash(keyp, strlen(keyp)+1); HashTableSet(htp, keyHash, valuep); char* gotp = HashTableGet(htp, keyHash); assert(strcmp(valuep, gotp) == 0); printf("OK\n"); } void testSlotCollision() { printf("%s... ", __FUNCTION__); size_t capacity = 1; HashTable* htp = HashTableMake(capacity); char* key1p = "hello"; char* value1p = "world"; size_t key1Hash = terribleHash(key1p, strlen(key1p)+1); HashTableSet(htp, key1Hash, value1p); assert(htp->population == 1); char* key2p = "foo"; char* value2p = "bar"; size_t key2Hash = terribleHash(key2p, strlen(key2p)+1); HashTableSet(htp, key2Hash, value2p); assert(htp->population == 2); char* got1p = HashTableGet(htp, key1Hash); assert(strcmp(value1p, got1p) == 0); char* got2p = HashTableGet(htp, key2Hash); assert(strcmp(value2p, got2p) == 0); printf("OK\n"); } void testHashCollision() { printf("%s... ", __FUNCTION__); size_t capacity = 1; HashTable* htp = HashTableMake(capacity); char* key1p = "ab"; char* value1p = "cd"; size_t key1Hash = terribleHash(key1p, strlen(key1p)+1); HashTableSet(htp, key1Hash, value1p); assert(htp->population == 1); char* key2p = "ba"; char* value2p = "dc"; size_t key2Hash = terribleHash(key2p, strlen(key2p)+1); HashTableSet(htp, key2Hash, value2p); assert(htp->population == 2); char* got1p = HashTableGet(htp, key1Hash); assert(strcmp(value1p, got1p) == 0); char* got2p = HashTableGet(htp, key2Hash); assert(strcmp(value2p, got2p) == 0); printf("OK\n"); } int main(int argc, char** argv) { testMake(); testFree(); testSet(); testGet(); testSlotCollision(); testHashCollision(); return 0; }