cellularmitosis/HashTable.c Secret

## README.md

      
    Raw
  

              README.md
            
          
    Blog 2020/8/20
<- previous |
index |
next ->
Hash table in C, part 2: chaining

Let's learn how to implement a hash table in C!
(See also part 1).
One technique to resolve the problem of "slot collisions" is the concept of
chaining,
which is a fancy way of saying "use linked lists".

The HTSlot linked list

We'll call our linked list node an HTSlot.  It will contain:

a key's hash
a pointer to a value
a pointer to the next HTSlot

struct _HTSlot {
    size_t keyHash;
    void* valuep;
    struct _HTSlot* nextp;
};
typedef struct _HTSlot HTSlot;
Our HashTable needs to be updated.
 struct _HashTable {
-    void** slotspp;
+    HTSlot** slotspp;
     size_t capacity;
     size_t population;
 };
HashTableFree needs to walk and free each HTSlot list.
 void HashTableFree(HashTable* htp) {
+    for (size_t i=0; i < htp->capacity; i++) {
+        HTSlot* slotp = htp->slotspp[i];
+        while (slotp != NULL) {
+            HTSlot* tmp = slotp;
+            free(slotp);
+            slotp = tmp->nextp;
+            continue;
+        }
+    }
     free(htp->slotspp);
     free(htp);
 }
Get and set

HashTableGet needs to walk each slot list and look for an exact key hash match.
 void* HashTableGet(HashTable* htp, size_t keyHash) {
     size_t slotIndex = keyHash % htp->capacity;
-    return htp->slotspp[slotIndex];
+    HTSlot* cursorp = htp->slotspp[slotIndex];
+    while (cursorp != NULL) {
+        if (cursorp->keyHash == keyHash) {
+            return cursorp->valuep;
+        } else {
+            cursorp = cursorp->nextp;
+            continue;
+        }
+    }
+    return NULL;
 }
HashTableSet walks the slot list, looking for a match to update.
If it doesn't find one, it appends a new slot onto the list.
void HashTableSet(HashTable* htp, size_t keyHash, void* valuep) {
    size_t slotIndex = keyHash % htp->capacity;
    HTSlot* cursorp = htp->slotspp[slotIndex];
    while (cursorp != NULL) {
        if (cursorp->keyHash == keyHash) {
            cursorp->valuep = valuep;
            if (cursorp->valuep == NULL && valuep != NULL) {
                htp->population++;
            } else if (cursorp->valuep != NULL && valuep == NULL) {
                htp->population--;
            }
            return;
        } else {
            cursorp = cursorp->nextp;
            continue;
        }
    }

    // no existing match?  append a new slot to the list.
    if (valuep != NULL) {
        HTSlot* newSlotp = _HTSlotMake();
        newSlotp->keyHash = keyHash;
        newSlotp->valuep = valuep;
        if (htp->slotspp[slotIndex] == NULL) {
            htp->slotspp[slotIndex] = newSlotp;
        } else {
            HTSlot* lastp = htp->slotspp[slotIndex];
            while (lastp->nextp != NULL) {
                lastp = lastp->nextp;
                continue;
            }
            lastp->nextp = newSlotp;
        }
        htp->population++;
    }
}
No more slot collisions

With these changes, our slot collision test passes!
./tests
testMake... OK
testFree... OK
testSet... OK
testGet... OK
testSlotCollision... OK

New problem: hash collisions

However, there is still a problem with this hash table:
it doesn't handle the case where two different keys happen to produce the same hash.
We can demonstrate this with a test.
This is an easy task for us, as terribleHash is so simplistic
that intentionally creating a hash collision is as easy as rearranging the letters in a key.
void testHashCollision() {
    printf("%s... ", __FUNCTION__);
    size_t capacity = 1;
    HashTable* htp = HashTableMake(capacity);

    char* key1p = "ab";
    char* value1p = "cd";
    size_t key1Hash = terribleHash(key1p, strlen(key1p)+1);
    HashTableSet(htp, key1Hash, value1p);
    assert(htp->population == 1);

    char* key2p = "ba";
    char* value2p = "dc";
    size_t key2Hash = terribleHash(key2p, strlen(key2p)+1);
    HashTableSet(htp, key2Hash, value2p);
    assert(htp->population == 2);

    char* got1p = HashTableGet(htp, key1Hash);
    assert(strcmp(value1p, got1p) == 0);

    char* got2p = HashTableGet(htp, key2Hash);
    assert(strcmp(value2p, got2p) == 0);

    printf("OK\n");
}
tests: tests.c:90: testHashCollision: Assertion `htp->population == 2' failed.

Next time

In part 3, we account for hash collisions.

  
## HashTable.c
#include "HashTable.h"

#include <stdio.h>  // fprintf
#include <stdlib.h>  // exit
#include <string.h>  // memset
#include <stdint.h>  // uint8_t
#include <stdbool.h>  // true
#include <assert.h>  // assert

void* dmalloc(size_t size) {
    void* p = malloc(size);
    if (p == NULL) {
        perror("Error: malloc failed: ");
        exit(1);
    }
    memset(p, 0, size);
    return p;
}
#define malloc(x) dont_use_malloc_use_dmalloc

size_t terribleHash(void* datap, size_t length) {
    size_t sum = 0;
    for (size_t i=0; i < length; i++) {
        uint8_t* byte = datap + i;
        sum += *byte;
        continue;
    }
    return sum;
}

static HTSlot* _HTSlotMake() {
    HTSlot* sp = dmalloc(sizeof(HTSlot));
    return sp;
}

HashTable* HashTableMake(size_t capacity) {
    HashTable* htp = dmalloc(sizeof(HashTable));
    htp->capacity = capacity;
    htp->slotspp = dmalloc(sizeof(void*) * capacity);
    return htp;
}

void HashTableFree(HashTable* htp) {
    for (size_t i=0; i < htp->capacity; i++) {
        HTSlot* slotp = htp->slotspp[i];
        while (slotp != NULL) {
            HTSlot* tmp = slotp;
            free(slotp);
            slotp = tmp->nextp;
            continue;
        }
    }
    free(htp->slotspp);
    free(htp);
}

void* HashTableGet(HashTable* htp, size_t keyHash) {
    size_t slotIndex = keyHash % htp->capacity;
    HTSlot* cursorp = htp->slotspp[slotIndex];
    while (cursorp != NULL) {
        if (cursorp->keyHash == keyHash) {
            return cursorp->valuep;
        } else {
            cursorp = cursorp->nextp;
            continue;
        }
    }
    return NULL;
}

void HashTableSet(HashTable* htp, size_t keyHash, void* valuep) {
    size_t slotIndex = keyHash % htp->capacity;
    HTSlot* cursorp = htp->slotspp[slotIndex];
    while (cursorp != NULL) {
        if (cursorp->keyHash == keyHash) {
            if (cursorp->valuep == NULL && valuep != NULL) {
                htp->population++;
            } else if (cursorp->valuep != NULL && valuep == NULL) {
                htp->population--;
            }
            cursorp->valuep = valuep;
            return;
        } else {
            cursorp = cursorp->nextp;
            continue;
        }
    }

    // no existing match?  append a new slot to the list.
    if (valuep != NULL) {
        HTSlot* newSlotp = _HTSlotMake();
        newSlotp->keyHash = keyHash;
        newSlotp->valuep = valuep;
        if (htp->slotspp[slotIndex] == NULL) {
            htp->slotspp[slotIndex] = newSlotp;
        } else {
            HTSlot* lastp = htp->slotspp[slotIndex];
            while (lastp->nextp != NULL) {
                lastp = lastp->nextp;
                continue;
            }
            lastp->nextp = newSlotp;
        }
        htp->population++;
    }
}

## HashTable.h
#ifndef _HASHTABLE_H_
#define _HASHTABLE_H_

#include <unistd.h>  // size_t

size_t terribleHash(void* datap, size_t length);

struct _HTSlot {
    size_t keyHash;
    void* valuep;
    struct _HTSlot* nextp;
};
typedef struct _HTSlot HTSlot;

struct _HashTable {
    HTSlot** slotspp;
    size_t capacity;
    size_t population;
};
typedef struct _HashTable HashTable;

HashTable* HashTableMake(size_t capacity);
void HashTableFree(HashTable* htp);

void* HashTableGet(HashTable* htp, size_t keyHash);
void HashTableSet(HashTable* htp, size_t keyHash, void* valuep);

#endif

## Makefile
default: tests
	./tests

tests: HashTable.o tests.c
	gcc -g -std=c99 -Wall -Werror HashTable.o tests.c -o tests

HashTable.o: HashTable.h HashTable.c
	gcc -g -std=c99 -Wall -Werror -c HashTable.c

clean:
	rm -f HashTable.o tests

.PHONY: default clean

## tests.c
#include "HashTable.h"

#include <assert.h>  // assert
#include <stdio.h>  // printf
#include <string.h>  // strlen, strcmp

void testMake() {
    printf("%s... ", __FUNCTION__);
    size_t capacity = 3;
    HashTable* htp = HashTableMake(capacity);
    assert(htp != NULL);
    assert(htp->capacity == 3);
    printf("OK\n");
}

void testFree() {
    printf("%s... ", __FUNCTION__);
    size_t capacity = 3;
    HashTable* htp = HashTableMake(capacity);
    HashTableFree(htp);
    printf("OK\n");
}

void testSet() {
    printf("%s... ", __FUNCTION__);
    size_t capacity = 3;
    HashTable* htp = HashTableMake(capacity);
    char* keyp = "hello";
    char* valuep = "world";
    size_t keyHash = terribleHash(keyp, strlen(keyp)+1);
    HashTableSet(htp, keyHash, valuep);
    assert(htp->population == 1);
    printf("OK\n");
}

void testGet() {
    printf("%s... ", __FUNCTION__);
    size_t capacity = 3;
    HashTable* htp = HashTableMake(capacity);
    char* keyp = "hello";
    char* valuep = "world";
    size_t keyHash = terribleHash(keyp, strlen(keyp)+1);
    HashTableSet(htp, keyHash, valuep);
    char* gotp = HashTableGet(htp, keyHash);
    assert(strcmp(valuep, gotp) == 0);
    printf("OK\n");
}

void testSlotCollision() {
    printf("%s... ", __FUNCTION__);
    size_t capacity = 1;
    HashTable* htp = HashTableMake(capacity);

    char* key1p = "hello";
    char* value1p = "world";
    size_t key1Hash = terribleHash(key1p, strlen(key1p)+1);
    HashTableSet(htp, key1Hash, value1p);
    assert(htp->population == 1);

    char* key2p = "foo";
    char* value2p = "bar";
    size_t key2Hash = terribleHash(key2p, strlen(key2p)+1);
    HashTableSet(htp, key2Hash, value2p);
    assert(htp->population == 2);

    char* got1p = HashTableGet(htp, key1Hash);
    assert(strcmp(value1p, got1p) == 0);

    char* got2p = HashTableGet(htp, key2Hash);
    assert(strcmp(value2p, got2p) == 0);

    printf("OK\n");
}

void testHashCollision() {
    printf("%s... ", __FUNCTION__);
    size_t capacity = 1;
    HashTable* htp = HashTableMake(capacity);

    char* key1p = "ab";
    char* value1p = "cd";
    size_t key1Hash = terribleHash(key1p, strlen(key1p)+1);
    HashTableSet(htp, key1Hash, value1p);
    assert(htp->population == 1);

    char* key2p = "ba";
    char* value2p = "dc";
    size_t key2Hash = terribleHash(key2p, strlen(key2p)+1);
    HashTableSet(htp, key2Hash, value2p);
    assert(htp->population == 2);

    char* got1p = HashTableGet(htp, key1Hash);
    assert(strcmp(value1p, got1p) == 0);

    char* got2p = HashTableGet(htp, key2Hash);
    assert(strcmp(value2p, got2p) == 0);

    printf("OK\n");
}

int main(int argc, char** argv) {
    testMake();
    testFree();
    testSet();
    testGet();
    testSlotCollision();
    testHashCollision();
    return 0;
}
	#include "HashTable.h"

	#include <stdio.h> // fprintf
	#include <stdlib.h> // exit
	#include <string.h> // memset
	#include <stdint.h> // uint8_t
	#include <stdbool.h> // true
	#include <assert.h> // assert

	void* dmalloc(size_t size) {
	void* p = malloc(size);
	if (p == NULL) {
	perror("Error: malloc failed: ");
	exit(1);
	}
	memset(p, 0, size);
	return p;
	}
	#define malloc(x) dont_use_malloc_use_dmalloc

	size_t terribleHash(void* datap, size_t length) {
	size_t sum = 0;
	for (size_t i=0; i < length; i++) {
	uint8_t* byte = datap + i;
	sum += *byte;
	continue;
	}
	return sum;
	}

	static HTSlot* _HTSlotMake() {
	HTSlot* sp = dmalloc(sizeof(HTSlot));
	return sp;
	}

	HashTable* HashTableMake(size_t capacity) {
	HashTable* htp = dmalloc(sizeof(HashTable));
	htp->capacity = capacity;
	htp->slotspp = dmalloc(sizeof(void) capacity);
	return htp;
	}

	void HashTableFree(HashTable* htp) {
	for (size_t i=0; i < htp->capacity; i++) {
	HTSlot* slotp = htp->slotspp[i];
	while (slotp != NULL) {
	HTSlot* tmp = slotp;
	free(slotp);
	slotp = tmp->nextp;
	continue;
	}
	}
	free(htp->slotspp);
	free(htp);
	}

	void* HashTableGet(HashTable* htp, size_t keyHash) {
	size_t slotIndex = keyHash % htp->capacity;
	HTSlot* cursorp = htp->slotspp[slotIndex];
	while (cursorp != NULL) {
	if (cursorp->keyHash == keyHash) {
	return cursorp->valuep;
	} else {
	cursorp = cursorp->nextp;
	continue;
	}
	}
	return NULL;
	}

	void HashTableSet(HashTable* htp, size_t keyHash, void* valuep) {
	size_t slotIndex = keyHash % htp->capacity;
	HTSlot* cursorp = htp->slotspp[slotIndex];
	while (cursorp != NULL) {
	if (cursorp->keyHash == keyHash) {
	if (cursorp->valuep == NULL && valuep != NULL) {
	htp->population++;
	} else if (cursorp->valuep != NULL && valuep == NULL) {
	htp->population--;
	}
	cursorp->valuep = valuep;
	return;
	} else {
	cursorp = cursorp->nextp;
	continue;
	}
	}

	// no existing match? append a new slot to the list.
	if (valuep != NULL) {
	HTSlot* newSlotp = _HTSlotMake();
	newSlotp->keyHash = keyHash;
	newSlotp->valuep = valuep;
	if (htp->slotspp[slotIndex] == NULL) {
	htp->slotspp[slotIndex] = newSlotp;
	} else {
	HTSlot* lastp = htp->slotspp[slotIndex];
	while (lastp->nextp != NULL) {
	lastp = lastp->nextp;
	continue;
	}
	lastp->nextp = newSlotp;
	}
	htp->population++;
	}
	}
	#ifndef _HASHTABLE_H_
	#define _HASHTABLE_H_

	#include <unistd.h> // size_t

	size_t terribleHash(void* datap, size_t length);

	struct _HTSlot {
	size_t keyHash;
	void* valuep;
	struct _HTSlot* nextp;
	};
	typedef struct _HTSlot HTSlot;

	struct _HashTable {
	HTSlot** slotspp;
	size_t capacity;
	size_t population;
	};
	typedef struct _HashTable HashTable;

	HashTable* HashTableMake(size_t capacity);
	void HashTableFree(HashTable* htp);

	void* HashTableGet(HashTable* htp, size_t keyHash);
	void HashTableSet(HashTable* htp, size_t keyHash, void* valuep);

	#endif
	default: tests
	./tests

	tests: HashTable.o tests.c
	gcc -g -std=c99 -Wall -Werror HashTable.o tests.c -o tests

	HashTable.o: HashTable.h HashTable.c
	gcc -g -std=c99 -Wall -Werror -c HashTable.c

	clean:
	rm -f HashTable.o tests

	.PHONY: default clean
	#include "HashTable.h"

	#include <assert.h> // assert
	#include <stdio.h> // printf
	#include <string.h> // strlen, strcmp

	void testMake() {
	printf("%s... ", __FUNCTION__);
	size_t capacity = 3;
	HashTable* htp = HashTableMake(capacity);
	assert(htp != NULL);
	assert(htp->capacity == 3);
	printf("OK\n");
	}

	void testFree() {
	printf("%s... ", __FUNCTION__);
	size_t capacity = 3;
	HashTable* htp = HashTableMake(capacity);
	HashTableFree(htp);
	printf("OK\n");
	}

	void testSet() {
	printf("%s... ", __FUNCTION__);
	size_t capacity = 3;
	HashTable* htp = HashTableMake(capacity);
	char* keyp = "hello";
	char* valuep = "world";
	size_t keyHash = terribleHash(keyp, strlen(keyp)+1);
	HashTableSet(htp, keyHash, valuep);
	assert(htp->population == 1);
	printf("OK\n");
	}

	void testGet() {
	printf("%s... ", __FUNCTION__);
	size_t capacity = 3;
	HashTable* htp = HashTableMake(capacity);
	char* keyp = "hello";
	char* valuep = "world";
	size_t keyHash = terribleHash(keyp, strlen(keyp)+1);
	HashTableSet(htp, keyHash, valuep);
	char* gotp = HashTableGet(htp, keyHash);
	assert(strcmp(valuep, gotp) == 0);
	printf("OK\n");
	}

	void testSlotCollision() {
	printf("%s... ", __FUNCTION__);
	size_t capacity = 1;
	HashTable* htp = HashTableMake(capacity);

	char* key1p = "hello";
	char* value1p = "world";
	size_t key1Hash = terribleHash(key1p, strlen(key1p)+1);
	HashTableSet(htp, key1Hash, value1p);
	assert(htp->population == 1);

	char* key2p = "foo";
	char* value2p = "bar";
	size_t key2Hash = terribleHash(key2p, strlen(key2p)+1);
	HashTableSet(htp, key2Hash, value2p);
	assert(htp->population == 2);

	char* got1p = HashTableGet(htp, key1Hash);
	assert(strcmp(value1p, got1p) == 0);

	char* got2p = HashTableGet(htp, key2Hash);
	assert(strcmp(value2p, got2p) == 0);

	printf("OK\n");
	}

	void testHashCollision() {
	printf("%s... ", __FUNCTION__);
	size_t capacity = 1;
	HashTable* htp = HashTableMake(capacity);

	char* key1p = "ab";
	char* value1p = "cd";
	size_t key1Hash = terribleHash(key1p, strlen(key1p)+1);
	HashTableSet(htp, key1Hash, value1p);
	assert(htp->population == 1);

	char* key2p = "ba";
	char* value2p = "dc";
	size_t key2Hash = terribleHash(key2p, strlen(key2p)+1);
	HashTableSet(htp, key2Hash, value2p);
	assert(htp->population == 2);

	char* got1p = HashTableGet(htp, key1Hash);
	assert(strcmp(value1p, got1p) == 0);

	char* got2p = HashTableGet(htp, key2Hash);
	assert(strcmp(value2p, got2p) == 0);

	printf("OK\n");
	}

	int main(int argc, char** argv) {
	testMake();
	testFree();
	testSet();
	testGet();
	testSlotCollision();
	testHashCollision();
	return 0;
	}