/** | |
Code for https://journaldev.com article | |
Purpose: A Hash Table Implementation | |
@author: Vijay Ramachandran | |
@date: 01-02-2020 | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#define CAPACITY 50000 // Size of the Hash Table | |
unsigned long hash_function(char* str) { | |
unsigned long i = 0; | |
for (int j=0; str[j]; j++) | |
i += str[j]; | |
return i % CAPACITY; | |
} | |
typedef struct Ht_item Ht_item; | |
// Define the Hash Table Item here | |
struct Ht_item { | |
char* key; | |
char* value; | |
}; | |
typedef struct LinkedList LinkedList; | |
// Define the Linkedlist here | |
struct LinkedList { | |
Ht_item* item; | |
LinkedList* next; | |
}; | |
typedef struct HashTable HashTable; | |
// Define the Hash Table here | |
struct HashTable { | |
// Contains an array of pointers | |
// to items | |
Ht_item** items; | |
LinkedList** overflow_buckets; | |
int size; | |
int count; | |
}; | |
static LinkedList* allocate_list () { | |
// Allocates memory for a Linkedlist pointer | |
LinkedList* list = (LinkedList*) calloc (1, sizeof(LinkedList)); | |
return list; | |
} | |
static LinkedList* linkedlist_insert(LinkedList* list, Ht_item* item) { | |
// Inserts the item onto the Linked List | |
if (!list) { | |
LinkedList* head = allocate_list(); | |
head->item = item; | |
head->next = NULL; | |
list = head; | |
return list; | |
} | |
else if (list->next == NULL) { | |
LinkedList* node = allocate_list(); | |
node->item = item; | |
node->next = NULL; | |
list->next = node; | |
return list; | |
} | |
LinkedList* temp = list; | |
while (temp->next) { | |
temp = temp->next; | |
} | |
LinkedList* node = allocate_list(); | |
node->item = item; | |
node->next = NULL; | |
temp->next = node; | |
return list; | |
} | |
static Ht_item* linkedlist_remove(LinkedList* list) { | |
// Removes the head from the linked list | |
// and returns the item of the popped element | |
if (!list) | |
return NULL; | |
if (!list->next) | |
return NULL; | |
LinkedList* node = list->next; | |
LinkedList* temp = list; | |
temp->next = NULL; | |
list = node; | |
Ht_item* it = NULL; | |
memcpy(temp->item, it, sizeof(Ht_item)); | |
free(temp->item->key); | |
free(temp->item->value); | |
free(temp->item); | |
free(temp); | |
return it; | |
} | |
static void free_linkedlist(LinkedList* list) { | |
LinkedList* temp = list; | |
if (!list) | |
return; | |
while (list) { | |
temp = list; | |
list = list->next; | |
free(temp->item->key); | |
free(temp->item->value); | |
free(temp->item); | |
free(temp); | |
} | |
} | |
static LinkedList** create_overflow_buckets(HashTable* table) { | |
// Create the overflow buckets; an array of linkedlists | |
LinkedList** buckets = (LinkedList**) calloc (table->size, sizeof(LinkedList*)); | |
for (int i=0; i<table->size; i++) | |
buckets[i] = NULL; | |
return buckets; | |
} | |
static void free_overflow_buckets(HashTable* table) { | |
// Free all the overflow bucket lists | |
LinkedList** buckets = table->overflow_buckets; | |
for (int i=0; i<table->size; i++) | |
free_linkedlist(buckets[i]); | |
free(buckets); | |
} | |
Ht_item* create_item(char* key, char* value) { | |
// Creates a pointer to a new hash table item | |
Ht_item* item = (Ht_item*) malloc (sizeof(Ht_item)); | |
item->key = (char*) calloc (strlen(key) + 1, sizeof(char)); | |
item->value = (char*) calloc (strlen(value) + 1, sizeof(char)); | |
strcpy(item->key, key); | |
strcpy(item->value, value); | |
return item; | |
} | |
HashTable* create_table(int size) { | |
// Creates a new HashTable | |
HashTable* table = (HashTable*) malloc (sizeof(HashTable)); | |
table->size = size; | |
table->count = 0; | |
table->items = (Ht_item**) calloc (table->size, sizeof(Ht_item*)); | |
for (int i=0; i<table->size; i++) | |
table->items[i] = NULL; | |
table->overflow_buckets = create_overflow_buckets(table); | |
return table; | |
} | |
void free_item(Ht_item* item) { | |
// Frees an item | |
free(item->key); | |
free(item->value); | |
free(item); | |
} | |
void free_hashtable(HashTable* table) { | |
// Frees the table | |
for (int i=0; i<table->size; i++) { | |
Ht_item* item = table->items[i]; | |
if (item != NULL) | |
free_item(item); | |
} | |
free_overflow_buckets(table); | |
free(table->items); | |
free(table); | |
} | |
void handle_collision(HashTable* table, unsigned long index, Ht_item* item) { | |
LinkedList* head = table->overflow_buckets[index]; | |
if (head == NULL) { | |
// We need to create the list | |
head = allocate_list(); | |
head->item = item; | |
table->overflow_buckets[index] = head; | |
return; | |
} | |
else { | |
// Insert to the list | |
table->overflow_buckets[index] = linkedlist_insert(head, item); | |
return; | |
} | |
} | |
void ht_insert(HashTable* table, char* key, char* value) { | |
// Create the item | |
Ht_item* item = create_item(key, value); | |
// Compute the index | |
int index = hash_function(key); | |
Ht_item* current_item = table->items[index]; | |
if (current_item == NULL) { | |
// Key does not exist. | |
if (table->count == table->size) { | |
// Hash Table Full | |
printf("Insert Error: Hash Table is full\n"); | |
// Remove the create item | |
free_item(item); | |
return; | |
} | |
// Insert directly | |
table->items[index] = item; | |
table->count++; | |
} | |
else { | |
// Scenario 1: We only need to update value | |
if (strcmp(current_item->key, key) == 0) { | |
free(table->items[index]->value); | |
table->items[index]->value = (char*) calloc (strlen(value) + 1, sizeof(char)); | |
strcpy(table->items[index]->value, value); | |
free_item(item); | |
return; | |
} | |
else { | |
// Scenario 2: Collision | |
handle_collision(table, index, item); | |
return; | |
} | |
} | |
} | |
char* ht_search(HashTable* table, char* key) { | |
// Searches the key in the hashtable | |
// and returns NULL if it doesn't exist | |
int index = hash_function(key); | |
Ht_item* item = table->items[index]; | |
LinkedList* head = table->overflow_buckets[index]; | |
// Ensure that we move to items which are not NULL | |
while (item != NULL) { | |
if (strcmp(item->key, key) == 0) | |
return item->value; | |
if (head == NULL) | |
return NULL; | |
item = head->item; | |
head = head->next; | |
} | |
return NULL; | |
} | |
void ht_delete(HashTable* table, char* key) { | |
// Deletes an item from the table | |
int index = hash_function(key); | |
Ht_item* item = table->items[index]; | |
LinkedList* head = table->overflow_buckets[index]; | |
if (item == NULL) { | |
// Does not exist. Return | |
return; | |
} | |
else { | |
if (head == NULL && strcmp(item->key, key) == 0) { | |
// No collision chain. Remove the item | |
// and set table index to NULL | |
table->items[index] = NULL; | |
free_item(item); | |
table->count--; | |
return; | |
} | |
else if (head != NULL) { | |
// Collision Chain exists | |
if (strcmp(item->key, key) == 0) { | |
// Remove this item and set the head of the list | |
// as the new item | |
free_item(item); | |
LinkedList* node = head; | |
head = head->next; | |
node->next = NULL; | |
table->items[index] = create_item(node->item->key, node->item->value); | |
free_linkedlist(node); | |
table->overflow_buckets[index] = head; | |
return; | |
} | |
LinkedList* curr = head; | |
LinkedList* prev = NULL; | |
while (curr) { | |
if (strcmp(curr->item->key, key) == 0) { | |
if (prev == NULL) { | |
// First element of the chain. Remove the chain | |
free_linkedlist(head); | |
table->overflow_buckets[index] = NULL; | |
return; | |
} | |
else { | |
// This is somewhere in the chain | |
prev->next = curr->next; | |
curr->next = NULL; | |
free_linkedlist(curr); | |
table->overflow_buckets[index] = head; | |
return; | |
} | |
} | |
curr = curr->next; | |
prev = curr; | |
} | |
} | |
} | |
} | |
void print_search(HashTable* table, char* key) { | |
char* val; | |
if ((val = ht_search(table, key)) == NULL) { | |
printf("%s does not exist\n", key); | |
return; | |
} | |
else { | |
printf("Key:%s, Value:%s\n", key, val); | |
} | |
} | |
void print_hashtable(HashTable* table) { | |
printf("\n-------------------\n"); | |
for (int i=0; i<table->size; i++) { | |
if (table->items[i]) { | |
printf("Index:%d, Key:%s, Value:%s", i, table->items[i]->key, table->items[i]->value); | |
if (table->overflow_buckets[i]) { | |
printf(" => Overflow Bucket => "); | |
LinkedList* head = table->overflow_buckets[i]; | |
while (head) { | |
printf("Key:%s, Value:%s ", head->item->key, head->item->value); | |
head = head->next; | |
} | |
} | |
printf("\n"); | |
} | |
} | |
printf("-------------------\n"); | |
} | |
int main() { | |
HashTable* ht = create_table(CAPACITY); | |
ht_insert(ht, "1", "First address"); | |
ht_insert(ht, "2", "Second address"); | |
ht_insert(ht, "Hel", "Third address"); | |
ht_insert(ht, "Cau", "Fourth address"); | |
print_search(ht, "1"); | |
print_search(ht, "2"); | |
print_search(ht, "3"); | |
print_search(ht, "Hel"); | |
print_search(ht, "Cau"); // Collision! | |
print_hashtable(ht); | |
ht_delete(ht, "1"); | |
ht_delete(ht, "Cau"); | |
print_hashtable(ht); | |
free_hashtable(ht); | |
return 0; | |
} |
Hi, thanks for writing your points out. It's almost been a year, and taking a look at the code now, I agree that the whole Linked List code is messed up, especially since half the code is simply unused.
Keeping a next
pointer to a HashTableItem
is definitely the way to go, since a pointer to a location is enough for insertion and deletion. And also, it would be more relevant to keep a memory limit for the "Table Full" error, so that's something you can consider. Maybe I'll rewrite this again and link a separate gist which is more relevant.
Again, I apologize for the bugs and errors in the code when I wrote this. Looking through your gist, it definitely the much better API for the hash table, and it's how I'd write it now. But for the purpose of the post, I don't want to make too many breaking changes until it's reflected there too.
However, to avoid misleading people thinking that this is a good implementation, anyone who's reading this can take a look at @oschonrock's gist, as my gist above is poorly designed and complicated, which naturally leads to more bugs. Again, thanks for the feedback, and appreciate your comments.
I dont understand your code yet, but it doesnt work
Hey!
Firstly, thanks for sharing your version of hash table in C.
Secondly, I find a bug when inserting two nodes with same key into the overflow_buckets. There's no key check there:
void handle_collision(HashTable* table, unsigned long index, Ht_item* item) {
LinkedList* head = table->overflow_buckets[index];
if (head == NULL) {
head = allocate_list();
head->item = item;
table->overflow_buckets[index] = head;
return;
}
else {
// here we need to check if there are duplicated keys
table->overflow_buckets[index] = linkedlist_insert(head, item);
return;
}
}
My solution is:
static void handle_collision(ht_table *table, unsigned long index, ht_item *item) {
ListNode* head = table->overflow_buckets[index];
if (head == NULL) {
head = create_item_list();
head->item = item;
table->overflow_buckets[index] = head;
return;
} else {
ListNode* curr = table->overflow_buckets[index];
while (curr) {
if (strcmp(curr->item->key, item->key) == 0) {
free_item(curr->item);
curr->item = item;
return;
}
curr = curr->next;
}
table->overflow_buckets[index] = insert_item_list(head, item);
return;
}
}
Hey, thanks for bringing this up. That's a valid point. However, from your snippet, you could directly return when you found an exact match, instead of deleting and then inserting to the rear again.
please add a license
Thank you for sharing this code.
There were some bugs I found. Please refer to the tag "Jeff Revised!" for the detail in the program.
If you have any questions or suggestions about code, please feel free to contact me and discuss with me.
Program link: https://gist.github.com/JeffWang0325/5bad679ec2c0e5034923d1bf8cb54367
After digging into the code some more, I found that your linked list code is quite "confused".
So I refactored/rewrote it. The fundamental change is that the HashTableItem now contains the
->next
ptr for the linked list, This makes the amount of code collapse to around half of what it was. Also you seemed to have a redundant "head" node in your linked lists. That's not necessary.Also I used
strdup
etc, to make the code more concise.I also changed the value type to int, because that is what I needed. but it's quick to change via the typedef at top and few more things.
If you're interested, take a look:
https://gist.github.com/oschonrock/d0540347a5a6739a24c4648e724a3e96