Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
An Implementation of a Hash Table using Separate Chaining
/**
Code for https://journaldev.com article
Purpose: A Hash Table Implementation
@author: Vijay Ramachandran
@date: 01-02-2020
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CAPACITY 50000 // Size of the Hash Table
unsigned long hash_function(char* str) {
unsigned long i = 0;
for (int j=0; str[j]; j++)
i += str[j];
return i % CAPACITY;
}
typedef struct Ht_item Ht_item;
// Define the Hash Table Item here
struct Ht_item {
char* key;
char* value;
};
typedef struct LinkedList LinkedList;
// Define the Linkedlist here
struct LinkedList {
Ht_item* item;
LinkedList* next;
};
typedef struct HashTable HashTable;
// Define the Hash Table here
struct HashTable {
// Contains an array of pointers
// to items
Ht_item** items;
LinkedList** overflow_buckets;
int size;
int count;
};
static LinkedList* allocate_list () {
// Allocates memory for a Linkedlist pointer
LinkedList* list = (LinkedList*) calloc (1, sizeof(LinkedList));
return list;
}
static LinkedList* linkedlist_insert(LinkedList* list, Ht_item* item) {
// Inserts the item onto the Linked List
if (!list) {
LinkedList* head = allocate_list();
head->item = item;
head->next = NULL;
list = head;
return list;
}
else if (list->next == NULL) {
LinkedList* node = allocate_list();
node->item = item;
node->next = NULL;
list->next = node;
return list;
}
LinkedList* temp = list;
while (temp->next) {
temp = temp->next;
}
LinkedList* node = allocate_list();
node->item = item;
node->next = NULL;
temp->next = node;
return list;
}
static Ht_item* linkedlist_remove(LinkedList* list) {
// Removes the head from the linked list
// and returns the item of the popped element
if (!list)
return NULL;
if (!list->next)
return NULL;
LinkedList* node = list->next;
LinkedList* temp = list;
temp->next = NULL;
list = node;
Ht_item* it = NULL;
memcpy(temp->item, it, sizeof(Ht_item));
free(temp->item->key);
free(temp->item->value);
free(temp->item);
free(temp);
return it;
}
static void free_linkedlist(LinkedList* list) {
LinkedList* temp = list;
if (!list)
return;
while (list) {
temp = list;
list = list->next;
free(temp->item->key);
free(temp->item->value);
free(temp->item);
free(temp);
}
}
static LinkedList** create_overflow_buckets(HashTable* table) {
// Create the overflow buckets; an array of linkedlists
LinkedList** buckets = (LinkedList**) calloc (table->size, sizeof(LinkedList*));
for (int i=0; i<table->size; i++)
buckets[i] = NULL;
return buckets;
}
static void free_overflow_buckets(HashTable* table) {
// Free all the overflow bucket lists
LinkedList** buckets = table->overflow_buckets;
for (int i=0; i<table->size; i++)
free_linkedlist(buckets[i]);
free(buckets);
}
Ht_item* create_item(char* key, char* value) {
// Creates a pointer to a new hash table item
Ht_item* item = (Ht_item*) malloc (sizeof(Ht_item));
item->key = (char*) calloc (strlen(key) + 1, sizeof(char));
item->value = (char*) calloc (strlen(value) + 1, sizeof(char));
strcpy(item->key, key);
strcpy(item->value, value);
return item;
}
HashTable* create_table(int size) {
// Creates a new HashTable
HashTable* table = (HashTable*) malloc (sizeof(HashTable));
table->size = size;
table->count = 0;
table->items = (Ht_item**) calloc (table->size, sizeof(Ht_item*));
for (int i=0; i<table->size; i++)
table->items[i] = NULL;
table->overflow_buckets = create_overflow_buckets(table);
return table;
}
void free_item(Ht_item* item) {
// Frees an item
free(item->key);
free(item->value);
free(item);
}
void free_hashtable(HashTable* table) {
// Frees the table
for (int i=0; i<table->size; i++) {
Ht_item* item = table->items[i];
if (item != NULL)
free_item(item);
}
free_overflow_buckets(table);
free(table->items);
free(table);
}
void handle_collision(HashTable* table, unsigned long index, Ht_item* item) {
LinkedList* head = table->overflow_buckets[index];
if (head == NULL) {
// We need to create the list
head = allocate_list();
head->item = item;
table->overflow_buckets[index] = head;
return;
}
else {
// Insert to the list
table->overflow_buckets[index] = linkedlist_insert(head, item);
return;
}
}
void ht_insert(HashTable* table, char* key, char* value) {
// Create the item
Ht_item* item = create_item(key, value);
// Compute the index
int index = hash_function(key);
Ht_item* current_item = table->items[index];
if (current_item == NULL) {
// Key does not exist.
if (table->count == table->size) {
// Hash Table Full
printf("Insert Error: Hash Table is full\n");
// Remove the create item
free_item(item);
return;
}
// Insert directly
table->items[index] = item;
table->count++;
}
else {
// Scenario 1: We only need to update value
if (strcmp(current_item->key, key) == 0) {
free(table->items[index]->value);
table->items[index]->value = (char*) calloc (strlen(value) + 1, sizeof(char));
strcpy(table->items[index]->value, value);
free_item(item);
return;
}
else {
// Scenario 2: Collision
handle_collision(table, index, item);
return;
}
}
}
char* ht_search(HashTable* table, char* key) {
// Searches the key in the hashtable
// and returns NULL if it doesn't exist
int index = hash_function(key);
Ht_item* item = table->items[index];
LinkedList* head = table->overflow_buckets[index];
// Ensure that we move to items which are not NULL
while (item != NULL) {
if (strcmp(item->key, key) == 0)
return item->value;
if (head == NULL)
return NULL;
item = head->item;
head = head->next;
}
return NULL;
}
void ht_delete(HashTable* table, char* key) {
// Deletes an item from the table
int index = hash_function(key);
Ht_item* item = table->items[index];
LinkedList* head = table->overflow_buckets[index];
if (item == NULL) {
// Does not exist. Return
return;
}
else {
if (head == NULL && strcmp(item->key, key) == 0) {
// No collision chain. Remove the item
// and set table index to NULL
table->items[index] = NULL;
free_item(item);
table->count--;
return;
}
else if (head != NULL) {
// Collision Chain exists
if (strcmp(item->key, key) == 0) {
// Remove this item and set the head of the list
// as the new item
free_item(item);
LinkedList* node = head;
head = head->next;
node->next = NULL;
table->items[index] = create_item(node->item->key, node->item->value);
free_linkedlist(node);
table->overflow_buckets[index] = head;
return;
}
LinkedList* curr = head;
LinkedList* prev = NULL;
while (curr) {
if (strcmp(curr->item->key, key) == 0) {
if (prev == NULL) {
// First element of the chain. Remove the chain
free_linkedlist(head);
table->overflow_buckets[index] = NULL;
return;
}
else {
// This is somewhere in the chain
prev->next = curr->next;
curr->next = NULL;
free_linkedlist(curr);
table->overflow_buckets[index] = head;
return;
}
}
curr = curr->next;
prev = curr;
}
}
}
}
void print_search(HashTable* table, char* key) {
char* val;
if ((val = ht_search(table, key)) == NULL) {
printf("%s does not exist\n", key);
return;
}
else {
printf("Key:%s, Value:%s\n", key, val);
}
}
void print_hashtable(HashTable* table) {
printf("\n-------------------\n");
for (int i=0; i<table->size; i++) {
if (table->items[i]) {
printf("Index:%d, Key:%s, Value:%s", i, table->items[i]->key, table->items[i]->value);
if (table->overflow_buckets[i]) {
printf(" => Overflow Bucket => ");
LinkedList* head = table->overflow_buckets[i];
while (head) {
printf("Key:%s, Value:%s ", head->item->key, head->item->value);
head = head->next;
}
}
printf("\n");
}
}
printf("-------------------\n");
}
int main() {
HashTable* ht = create_table(CAPACITY);
ht_insert(ht, "1", "First address");
ht_insert(ht, "2", "Second address");
ht_insert(ht, "Hel", "Third address");
ht_insert(ht, "Cau", "Fourth address");
print_search(ht, "1");
print_search(ht, "2");
print_search(ht, "3");
print_search(ht, "Hel");
print_search(ht, "Cau"); // Collision!
print_hashtable(ht);
ht_delete(ht, "1");
ht_delete(ht, "Cau");
print_hashtable(ht);
free_hashtable(ht);
return 0;
}
@he141142

This comment has been minimized.

Copy link

@he141142 he141142 commented May 31, 2020

excuse me! I've a question that why in line "77" why ("while(temp->next->next) "instead of "while(temp->next)")?
I got crash in that question

@kuriringohankamehameha

This comment has been minimized.

Copy link
Owner Author

@kuriringohankamehameha kuriringohankamehameha commented May 31, 2020

You're right, that's a mistake. Don't know why I did that. Thanks! Updated the gist

@aryobimo17

This comment has been minimized.

Copy link

@aryobimo17 aryobimo17 commented Jun 11, 2020

Hello I wanna ask why I got print error 'print_table' was not declared in this scope. line 369

@kuriringohankamehameha

This comment has been minimized.

Copy link
Owner Author

@kuriringohankamehameha kuriringohankamehameha commented Jun 11, 2020

Oops again. It's supposed to be print_hashtable(). Updated the gist

@kiran-kumar-thatikayala

This comment has been minimized.

Copy link

@kiran-kumar-thatikayala kiran-kumar-thatikayala commented Sep 10, 2020

line no 304 // First element of the chain. Remove the chain

why removing the chain if it is the first element? I assume before freeing head set table->overflow_buckets[index] =head->next;

@oschonrock

This comment has been minimized.

Copy link

@oschonrock oschonrock commented Jan 16, 2021

Nice Job! Good explanation on the blog.

If I reduce CAPACITY to 3 to force some collisions I get a segmentation fault.

I traced it down with valgrind to accessing free'd memory, ultimately the bug is on L318 and L319.

These 2 lines need to be swapped, otherwise prev == curr always, and we lose our way ...

@oschonrock

This comment has been minimized.

Copy link

@oschonrock oschonrock commented Jan 16, 2021

this does not make sense to me:

    // Compute the index
    int index = hash_function(key);

    Ht_item* current_item = table->items[index];

    if (current_item == NULL) {
        // Key does not exist.
        if (table->count == table->size) {
            // Hash Table Full
            printf("Insert Error: Hash Table is full\n");
            // Remove the create item
            free_item(item);
            return;
        }

if the hash value points to a slot which is empty ( current == NULL ) then the table cannot be full. So the entire "table full" if can be removed I think.

@oschonrock

This comment has been minimized.

Copy link

@oschonrock oschonrock commented Jan 17, 2021

After digging into the code some more, I found that your linked list code is quite "confused".

So I refactored/rewrote it. The fundamental change is that the HashTableItem now contains the ->next ptr for the linked list, This makes the amount of code collapse to around half of what it was. Also you seemed to have a redundant "head" node in your linked lists. That's not necessary.

Also I used strdup etc, to make the code more concise.

I also changed the value type to int, because that is what I needed. but it's quick to change via the typedef at top and few more things.

If you're interested, take a look:

https://gist.github.com/oschonrock/d0540347a5a6739a24c4648e724a3e96

@kuriringohankamehameha

This comment has been minimized.

Copy link
Owner Author

@kuriringohankamehameha kuriringohankamehameha commented Jan 17, 2021

Hi, thanks for writing your points out. It's almost been a year, and taking a look at the code now, I agree that the whole Linked List code is messed up, especially since half the code is simply unused.

Keeping a next pointer to a HashTableItem is definitely the way to go, since a pointer to a location is enough for insertion and deletion. And also, it would be more relevant to keep a memory limit for the "Table Full" error, so that's something you can consider. Maybe I'll rewrite this again and link a separate gist which is more relevant.

Again, I apologize for the bugs and errors in the code when I wrote this. Looking through your gist, it definitely the much better API for the hash table, and it's how I'd write it now. But for the purpose of the post, I don't want to make too many breaking changes until it's reflected there too.

However, to avoid misleading people thinking that this is a good implementation, anyone who's reading this can take a look at @oschonrock's gist, as my gist above is poorly designed and complicated, which naturally leads to more bugs. Again, thanks for the feedback, and appreciate your comments.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment