Skip to content

Instantly share code, notes, and snippets.

@kuriringohankamehameha
Last active April 16, 2023 19:27
Show Gist options
  • Star 18 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save kuriringohankamehameha/fceee2e3c126f37fd254ee0acf2b4633 to your computer and use it in GitHub Desktop.
Save kuriringohankamehameha/fceee2e3c126f37fd254ee0acf2b4633 to your computer and use it in GitHub Desktop.
An Implementation of a Hash Table using Separate Chaining
/**
Code for https://journaldev.com article
Purpose: A Hash Table Implementation
@author: Vijay Ramachandran
@date: 01-02-2020
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CAPACITY 50000 // Size of the Hash Table
unsigned long hash_function(char* str) {
unsigned long i = 0;
for (int j=0; str[j]; j++)
i += str[j];
return i % CAPACITY;
}
typedef struct Ht_item Ht_item;
// Define the Hash Table Item here
struct Ht_item {
char* key;
char* value;
};
typedef struct LinkedList LinkedList;
// Define the Linkedlist here
struct LinkedList {
Ht_item* item;
LinkedList* next;
};
typedef struct HashTable HashTable;
// Define the Hash Table here
struct HashTable {
// Contains an array of pointers
// to items
Ht_item** items;
LinkedList** overflow_buckets;
int size;
int count;
};
static LinkedList* allocate_list () {
// Allocates memory for a Linkedlist pointer
LinkedList* list = (LinkedList*) calloc (1, sizeof(LinkedList));
return list;
}
static LinkedList* linkedlist_insert(LinkedList* list, Ht_item* item) {
// Inserts the item onto the Linked List
if (!list) {
LinkedList* head = allocate_list();
head->item = item;
head->next = NULL;
list = head;
return list;
}
else if (list->next == NULL) {
LinkedList* node = allocate_list();
node->item = item;
node->next = NULL;
list->next = node;
return list;
}
LinkedList* temp = list;
while (temp->next) {
temp = temp->next;
}
LinkedList* node = allocate_list();
node->item = item;
node->next = NULL;
temp->next = node;
return list;
}
static Ht_item* linkedlist_remove(LinkedList* list) {
// Removes the head from the linked list
// and returns the item of the popped element
if (!list)
return NULL;
if (!list->next)
return NULL;
LinkedList* node = list->next;
LinkedList* temp = list;
temp->next = NULL;
list = node;
Ht_item* it = NULL;
memcpy(temp->item, it, sizeof(Ht_item));
free(temp->item->key);
free(temp->item->value);
free(temp->item);
free(temp);
return it;
}
static void free_linkedlist(LinkedList* list) {
LinkedList* temp = list;
if (!list)
return;
while (list) {
temp = list;
list = list->next;
free(temp->item->key);
free(temp->item->value);
free(temp->item);
free(temp);
}
}
static LinkedList** create_overflow_buckets(HashTable* table) {
// Create the overflow buckets; an array of linkedlists
LinkedList** buckets = (LinkedList**) calloc (table->size, sizeof(LinkedList*));
for (int i=0; i<table->size; i++)
buckets[i] = NULL;
return buckets;
}
static void free_overflow_buckets(HashTable* table) {
// Free all the overflow bucket lists
LinkedList** buckets = table->overflow_buckets;
for (int i=0; i<table->size; i++)
free_linkedlist(buckets[i]);
free(buckets);
}
Ht_item* create_item(char* key, char* value) {
// Creates a pointer to a new hash table item
Ht_item* item = (Ht_item*) malloc (sizeof(Ht_item));
item->key = (char*) calloc (strlen(key) + 1, sizeof(char));
item->value = (char*) calloc (strlen(value) + 1, sizeof(char));
strcpy(item->key, key);
strcpy(item->value, value);
return item;
}
HashTable* create_table(int size) {
// Creates a new HashTable
HashTable* table = (HashTable*) malloc (sizeof(HashTable));
table->size = size;
table->count = 0;
table->items = (Ht_item**) calloc (table->size, sizeof(Ht_item*));
for (int i=0; i<table->size; i++)
table->items[i] = NULL;
table->overflow_buckets = create_overflow_buckets(table);
return table;
}
void free_item(Ht_item* item) {
// Frees an item
free(item->key);
free(item->value);
free(item);
}
void free_hashtable(HashTable* table) {
// Frees the table
for (int i=0; i<table->size; i++) {
Ht_item* item = table->items[i];
if (item != NULL)
free_item(item);
}
free_overflow_buckets(table);
free(table->items);
free(table);
}
void handle_collision(HashTable* table, unsigned long index, Ht_item* item) {
LinkedList* head = table->overflow_buckets[index];
if (head == NULL) {
// We need to create the list
head = allocate_list();
head->item = item;
table->overflow_buckets[index] = head;
return;
}
else {
// Insert to the list
table->overflow_buckets[index] = linkedlist_insert(head, item);
return;
}
}
void ht_insert(HashTable* table, char* key, char* value) {
// Create the item
Ht_item* item = create_item(key, value);
// Compute the index
int index = hash_function(key);
Ht_item* current_item = table->items[index];
if (current_item == NULL) {
// Key does not exist.
if (table->count == table->size) {
// Hash Table Full
printf("Insert Error: Hash Table is full\n");
// Remove the create item
free_item(item);
return;
}
// Insert directly
table->items[index] = item;
table->count++;
}
else {
// Scenario 1: We only need to update value
if (strcmp(current_item->key, key) == 0) {
free(table->items[index]->value);
table->items[index]->value = (char*) calloc (strlen(value) + 1, sizeof(char));
strcpy(table->items[index]->value, value);
free_item(item);
return;
}
else {
// Scenario 2: Collision
handle_collision(table, index, item);
return;
}
}
}
char* ht_search(HashTable* table, char* key) {
// Searches the key in the hashtable
// and returns NULL if it doesn't exist
int index = hash_function(key);
Ht_item* item = table->items[index];
LinkedList* head = table->overflow_buckets[index];
// Ensure that we move to items which are not NULL
while (item != NULL) {
if (strcmp(item->key, key) == 0)
return item->value;
if (head == NULL)
return NULL;
item = head->item;
head = head->next;
}
return NULL;
}
void ht_delete(HashTable* table, char* key) {
// Deletes an item from the table
int index = hash_function(key);
Ht_item* item = table->items[index];
LinkedList* head = table->overflow_buckets[index];
if (item == NULL) {
// Does not exist. Return
return;
}
else {
if (head == NULL && strcmp(item->key, key) == 0) {
// No collision chain. Remove the item
// and set table index to NULL
table->items[index] = NULL;
free_item(item);
table->count--;
return;
}
else if (head != NULL) {
// Collision Chain exists
if (strcmp(item->key, key) == 0) {
// Remove this item and set the head of the list
// as the new item
free_item(item);
LinkedList* node = head;
head = head->next;
node->next = NULL;
table->items[index] = create_item(node->item->key, node->item->value);
free_linkedlist(node);
table->overflow_buckets[index] = head;
return;
}
LinkedList* curr = head;
LinkedList* prev = NULL;
while (curr) {
if (strcmp(curr->item->key, key) == 0) {
if (prev == NULL) {
// First element of the chain. Remove the chain
free_linkedlist(head);
table->overflow_buckets[index] = NULL;
return;
}
else {
// This is somewhere in the chain
prev->next = curr->next;
curr->next = NULL;
free_linkedlist(curr);
table->overflow_buckets[index] = head;
return;
}
}
curr = curr->next;
prev = curr;
}
}
}
}
void print_search(HashTable* table, char* key) {
char* val;
if ((val = ht_search(table, key)) == NULL) {
printf("%s does not exist\n", key);
return;
}
else {
printf("Key:%s, Value:%s\n", key, val);
}
}
void print_hashtable(HashTable* table) {
printf("\n-------------------\n");
for (int i=0; i<table->size; i++) {
if (table->items[i]) {
printf("Index:%d, Key:%s, Value:%s", i, table->items[i]->key, table->items[i]->value);
if (table->overflow_buckets[i]) {
printf(" => Overflow Bucket => ");
LinkedList* head = table->overflow_buckets[i];
while (head) {
printf("Key:%s, Value:%s ", head->item->key, head->item->value);
head = head->next;
}
}
printf("\n");
}
}
printf("-------------------\n");
}
int main() {
HashTable* ht = create_table(CAPACITY);
ht_insert(ht, "1", "First address");
ht_insert(ht, "2", "Second address");
ht_insert(ht, "Hel", "Third address");
ht_insert(ht, "Cau", "Fourth address");
print_search(ht, "1");
print_search(ht, "2");
print_search(ht, "3");
print_search(ht, "Hel");
print_search(ht, "Cau"); // Collision!
print_hashtable(ht);
ht_delete(ht, "1");
ht_delete(ht, "Cau");
print_hashtable(ht);
free_hashtable(ht);
return 0;
}
@aryobimo17
Copy link

Hello I wanna ask why I got print error 'print_table' was not declared in this scope. line 369

@kuriringohankamehameha
Copy link
Author

kuriringohankamehameha commented Jun 11, 2020

Oops again. It's supposed to be print_hashtable(). Updated the gist

@kiran-kumar-thatikayala

line no 304 // First element of the chain. Remove the chain

why removing the chain if it is the first element? I assume before freeing head set table->overflow_buckets[index] =head->next;

@oschonrock
Copy link

Nice Job! Good explanation on the blog.

If I reduce CAPACITY to 3 to force some collisions I get a segmentation fault.

I traced it down with valgrind to accessing free'd memory, ultimately the bug is on L318 and L319.

These 2 lines need to be swapped, otherwise prev == curr always, and we lose our way ...

@oschonrock
Copy link

this does not make sense to me:

    // Compute the index
    int index = hash_function(key);

    Ht_item* current_item = table->items[index];

    if (current_item == NULL) {
        // Key does not exist.
        if (table->count == table->size) {
            // Hash Table Full
            printf("Insert Error: Hash Table is full\n");
            // Remove the create item
            free_item(item);
            return;
        }

if the hash value points to a slot which is empty ( current == NULL ) then the table cannot be full. So the entire "table full" if can be removed I think.

@oschonrock
Copy link

oschonrock commented Jan 17, 2021

After digging into the code some more, I found that your linked list code is quite "confused".

So I refactored/rewrote it. The fundamental change is that the HashTableItem now contains the ->next ptr for the linked list, This makes the amount of code collapse to around half of what it was. Also you seemed to have a redundant "head" node in your linked lists. That's not necessary.

Also I used strdup etc, to make the code more concise.

I also changed the value type to int, because that is what I needed. but it's quick to change via the typedef at top and few more things.

If you're interested, take a look:

https://gist.github.com/oschonrock/d0540347a5a6739a24c4648e724a3e96

@kuriringohankamehameha
Copy link
Author

Hi, thanks for writing your points out. It's almost been a year, and taking a look at the code now, I agree that the whole Linked List code is messed up, especially since half the code is simply unused.

Keeping a next pointer to a HashTableItem is definitely the way to go, since a pointer to a location is enough for insertion and deletion. And also, it would be more relevant to keep a memory limit for the "Table Full" error, so that's something you can consider. Maybe I'll rewrite this again and link a separate gist which is more relevant.

Again, I apologize for the bugs and errors in the code when I wrote this. Looking through your gist, it definitely the much better API for the hash table, and it's how I'd write it now. But for the purpose of the post, I don't want to make too many breaking changes until it's reflected there too.

However, to avoid misleading people thinking that this is a good implementation, anyone who's reading this can take a look at @oschonrock's gist, as my gist above is poorly designed and complicated, which naturally leads to more bugs. Again, thanks for the feedback, and appreciate your comments.

@superbin1996
Copy link

I dont understand your code yet, but it doesnt work

@PrinterFranklin
Copy link

Hey!

Firstly, thanks for sharing your version of hash table in C.

Secondly, I find a bug when inserting two nodes with same key into the overflow_buckets. There's no key check there:

void handle_collision(HashTable* table, unsigned long index, Ht_item* item) {
    LinkedList* head = table->overflow_buckets[index];
 
    if (head == NULL) {
        head = allocate_list();
        head->item = item;
        table->overflow_buckets[index] = head;
        return;
    }
    else {
        // here we need to check if there are duplicated keys
        table->overflow_buckets[index] = linkedlist_insert(head, item);
        return;
    }
}

My solution is:

static void handle_collision(ht_table *table, unsigned long index, ht_item *item) {
        ListNode* head = table->overflow_buckets[index];

        if (head == NULL) {
                head = create_item_list();
                head->item = item;
                table->overflow_buckets[index] = head;
                return;
        } else {
                ListNode* curr = table->overflow_buckets[index];
                while (curr) {
                        if (strcmp(curr->item->key, item->key) == 0) {
                                free_item(curr->item);
                                curr->item = item;
                                return;
                        }
                        curr = curr->next;
                }
                table->overflow_buckets[index] = insert_item_list(head, item);
                return;
        }
}

@kuriringohankamehameha
Copy link
Author

kuriringohankamehameha commented Jun 4, 2021

Hey, thanks for bringing this up. That's a valid point. However, from your snippet, you could directly return when you found an exact match, instead of deleting and then inserting to the rear again.

@alex-tee
Copy link

please add a license

@JeffWang0325
Copy link

Thank you for sharing this code.
There were some bugs I found. Please refer to the tag "Jeff Revised!" for the detail in the program.
If you have any questions or suggestions about code, please feel free to contact me and discuss with me. 😄😄😄
Program link: https://gist.github.com/JeffWang0325/5bad679ec2c0e5034923d1bf8cb54367

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment