Last active
June 5, 2023 20:30
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Basic hash example an example found on the internet | |
* | |
* | |
* I u/McUsr made it work flawlessly with the good help of | |
* r/C_programming: | |
* u/Bitwise_Gamgee, u/No-Adhesiveness-8125 and u/curiously-b2. | |
* I got insights and suggestions from u/t40, u/ThatsMyFavoriteThing. | |
* u/flyingiron, u/Daiktana, and last but not least u/skeeto! | |
* Thanks a bunch guys! | |
* | |
* Any errors that may turn up in this file as it is provided, | |
* are mine, the code is consciously tested, with both valgrind, and | |
* with cc -Wall -Wextra -Wpedantic -fsanitize=address,undefined htable.c | |
* | |
* (c) 2023 McUsr Gpl 2.0 https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html | |
* | |
* Usage: | |
* | |
* As you see fit: the idea is to copy it into the folder you intend to use it. | |
* My idea, is to change the node structure when I need that, but keep something | |
* named key in there, which is a hash based on a nested structure named data. | |
* | |
* Of course I must go in and add free routines everywhere any data should be freed, | |
* but to me, that seems like a small price to pay for not having a lot of overhead. | |
* | |
* TODO: insert functions for measuring the number of collisions | |
* the fill grade, percentage wide, and other such stats. | |
* | |
* Rev 1. | |
* I think it works well enough with 320.000 words, for hobby | |
* use at least. | |
* | |
* Rev 2. | |
* I have followed every advice I got on reddit that I thought made sense | |
* https://www.reddit.com/r/C_Programming/comments/141cyzh/please_critque_the_hashtable_that_is_now_fixed/ | |
* | |
* Thank you all so much for your efforts, suggestions, questions and ideas. | |
* I feel it is kind of robust now, at least for home-use. | |
* | |
* And most important, I have learned alot! | |
* | |
* Thank you so much! | |
* | |
* Stats on my machine: | |
* | |
* time a.out <~/dict/web2 |wc -l (235.000 lines of text, on word on each) | |
* real 0m0.403s | |
* user 0m0.347s | |
* sys 0m0.069s* | |
* | |
* Absolutely commensurable for me! | |
* | |
* Version3: | |
* | |
* I ended up implementing a new FNV ihtable_hash just as u/skeeto made it, (I adapted it). | |
* I kept the intsize HASHSIZE value, but that can be replaced with a much bigger primenumber now, | |
* if you change the use of ints for lookups, with size_t. | |
* But for now, the hashkey I use is fine as it is, and I'll enjoy the improved distribution of keys. | |
* | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include<string.h> | |
/*----This is the start of where you snip it, if you make an include file | |
*----that you include into this file, and other files. | |
*/ | |
#define HASHSIZE 35317 | |
#define MAXLINE 1024 | |
typedef struct tnode { | |
char *data; | |
struct tnode *next; | |
} node; | |
/* htable_init: initialize hashtable: you still need to have declared | |
* your hash table and allocated memory for it in main() | |
* up front, (like in the main() routine of the driver. */ | |
void htable_init(node * hashtable); | |
void htable_insert(node * hashtable, char *str);/* insert data into hashtable */ | |
void htable_display(node * hashtable); /* display hashtable */ | |
int htable_delete(node * hashtable, char *str); /* delete an entry from hashtable */ | |
node * htable_get(node * hashtable, char *str); /* returns an entry from a hashtable */ | |
int htable_search(node * hashtable, char *str); /* searches for an entry */ | |
void htable_destroy(node * hashtable); | |
const char PNAME[]="htable_driver" ; | |
/*----This is the end of where you snip it, if you make an include file | |
*----that you include into this file and other files. | |
*/ | |
/* ihtable_resolve: | |
* resolve collisions in hashtable by inserting into linked in new node */ | |
static void ihtable_resolve(node * hashtable, | |
int loc, char *str); | |
/* ihtable_hash | |
* make hash key from string to insert into hashtable */ | |
static int ihtable_hash(char *str); | |
/* xstrdup: | |
* An strdup function to hide #defines and check for mem alloc | |
* errors on non-linux machines. | |
*/ | |
static char *xstrdup(char *str); | |
/* xmalloc: | |
* A malloc function to hide #defines and check for malloc | |
* errors on non-linux machines. | |
*/ | |
static node *xmalloc( size_t size); | |
#ifndef __linux__ | |
/* imem_err: | |
* print mem alloc err and die (for non-linux machines)*/ | |
static void imem_err(const char *progname, const char *libcall, const char *funcname); | |
#endif | |
#define DRIVER_FOR_HASHMAP | |
#ifdef DRIVER_FOR_HASHMAP | |
int main(void) | |
{ | |
char line[MAXLINE]; | |
node *hashtable; | |
node *mynode; | |
int res = 0; | |
hashtable = xmalloc(HASHSIZE * sizeof(node)); | |
htable_init(hashtable); | |
htable_insert(hashtable, "tan"); | |
if (htable_search(hashtable,"tan")) | |
printf("I found \"tan\"!\n"); | |
else | |
printf("I didn't find \"tan\"!\n"); | |
mynode = htable_get(hashtable,"tan"); | |
if (mynode != NULL) | |
printf("I found %s!\n",mynode->data); | |
else | |
printf("I didn't find \"tan\"\n"); | |
/* this code probably adds newlines after the words. */ | |
while ((fgets(line, MAXLINE, stdin)) != NULL) { | |
line[strlen(line)-1] = '\0' ; | |
htable_insert(hashtable, line); | |
} | |
res = htable_delete(hashtable, "tan"); | |
printf("result of deleting \"tan\" : %d\n", res); | |
htable_display(hashtable); | |
htable_destroy(hashtable); | |
return 0; | |
} | |
#endif | |
/* htable_destroy: | |
* Deletes every node and in the end the hashtable itself. | |
* OBSERVE: you'll need add your own code to this, if you | |
* add an extra struct within node. | |
*/ | |
void htable_destroy(node * hashtable) | |
{ | |
/* Thanks: u/Bitwise_Gamgee and u/daikatana */ | |
if (hashtable == NULL) { | |
fprintf(stderr, "Error: Null hashtable cannot be destroyed.\n"); | |
exit(2); | |
} | |
for (int i = 0; i < HASHSIZE; i++) | |
if (hashtable[i].data != NULL) { | |
node *target = &hashtable[i]; | |
while (target->next != NULL) { | |
node *node_to_delete = target->next; | |
target->next = node_to_delete->next; | |
free(node_to_delete->data); | |
node_to_delete->data=NULL; | |
free(node_to_delete); | |
node_to_delete=NULL; | |
} | |
free(hashtable[i].data); | |
hashtable[i].data = NULL; | |
} | |
free(hashtable); | |
hashtable = NULL; | |
} | |
/* htable_init: initialize hashtable: you still need to have declared | |
* your hash table and allocated memory for it in main() | |
* up front, (like in the main() routine of the driver. */ | |
void htable_init(node * hashtable) | |
{ | |
for (int i = 0; i < HASHSIZE; i++) { | |
hashtable[i].data = NULL; | |
hashtable[i].next = NULL; | |
} | |
} | |
/* insert data into hashtable */ | |
void htable_insert(node * hashtable, char *str) | |
{ | |
int index = 0; | |
/* determine hash key for str */ | |
index = ihtable_hash(str); | |
if (hashtable[index].data != NULL) { | |
/* A collision occurs - resolve by chaining */ | |
ihtable_resolve(hashtable, index, str); | |
} else { | |
hashtable[index].data = xstrdup(str); | |
} | |
} | |
/* ihtable_hash: FNV hashing. | |
* thanks to u/skeeto | |
* https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash | |
*/ | |
static int ihtable_hash(char *str) | |
{ | |
unsigned long long hash = 0xcbf29ce484222325; | |
for (; *str; str++) { | |
hash ^= (unsigned char)*str; | |
hash *= 0x00000100000001b3; | |
} | |
hash ^= hash >> 32; | |
return hash % HASHSIZE; | |
} | |
/* resolve collisions in hashtable */ | |
static void ihtable_resolve(node * hashtable, int loc, char *str) | |
{ | |
node *to_resolve = NULL; | |
to_resolve = hashtable + loc; | |
while (to_resolve->next != NULL) | |
to_resolve = to_resolve->next; | |
to_resolve->next = xmalloc(sizeof(node)); | |
to_resolve->next->data = xstrdup(str); | |
to_resolve->next->next = NULL; | |
to_resolve = NULL; | |
} | |
/* htable_display :print contents of hashtable to stdout. | |
* A "driver" routine to prove that it works. */ | |
void htable_display(node * hashtable) | |
{ | |
node *target; | |
for (int i = 0; i < HASHSIZE; i++) { | |
if (hashtable[i].data != NULL) { | |
target = hashtable + i; | |
while (target) | |
/* printf("location: %d, data: %s", i, target->data), target = target->next; */ | |
printf("%s\n", target->data), target = target->next; | |
} | |
} | |
} | |
/* returns an entry from hashtable ret NULL if NOT found */ | |
node * htable_get(node * hashtable, char *str) | |
{ | |
int index = ihtable_hash(str); | |
/* no item at this location */ | |
if (hashtable[index].data == NULL) | |
return NULL; | |
/* only one item at this location */ | |
if (hashtable[index].next == NULL) { | |
if (strcmp(hashtable[index].data, str) == 0) { | |
/* item found */ | |
return &hashtable[index]; | |
} | |
} else { | |
/* there is a chaining case */ | |
node *target = hashtable + index; | |
/* linked list similar */ | |
while (target->next != NULL) { | |
if (strcmp(target->next->data, str) == 0) { | |
return target->next; | |
} else | |
target=target->next; | |
} | |
} | |
return NULL; | |
} | |
/* searches an entry from hashtable ret 1 if found */ | |
int htable_search(node * hashtable, char *str) | |
{ | |
int index = ihtable_hash(str); | |
/* no item at this location */ | |
if (hashtable[index].data == NULL) | |
return 0; | |
/* only one item at this location */ | |
if (hashtable[index].next == NULL) { | |
if (strcmp(hashtable[index].data, str) == 0) { | |
/* item found */ | |
return 1; | |
} | |
} else { | |
/* there is a chaining case */ | |
node *target = hashtable + index; | |
/* linked list similar */ | |
while (target->next != NULL) { | |
if (strcmp(target->next->data, str) == 0) { | |
return 1; | |
} else | |
target=target->next; | |
} | |
} | |
return 0; | |
} | |
/* delete an entry from hashtable ret 1 if did */ | |
int htable_delete(node * hashtable, char *str) | |
{ | |
char *data_to_free = NULL; | |
int index = 0,found=0; | |
index = ihtable_hash(str); | |
/* no item at this location */ | |
if (hashtable[index].data == NULL) | |
return found; | |
/* only one item at this location */ | |
if (hashtable[index].next == NULL) { | |
if (strcmp(hashtable[index].data, str) == 0) { | |
found=1; | |
/* item found */ | |
data_to_free = hashtable[index].data; | |
hashtable[index].data = NULL; | |
free(data_to_free); | |
} | |
} else { | |
/* there is a chaining case */ | |
node *before_curnode = hashtable + index; | |
node *curnode_in_chain=NULL; | |
/* the chain of nodes sharing a hashky is a linked list */ | |
while (before_curnode->next != NULL) { | |
if (strcmp(before_curnode->next->data, str) == 0) { | |
found=1; | |
curnode_in_chain = before_curnode->next; | |
if (before_curnode->next->next != NULL) | |
before_curnode->next = before_curnode->next->next; | |
else | |
before_curnode->next = NULL; | |
/* we need to free the data in the node as well */ | |
data_to_free = curnode_in_chain->data; | |
curnode_in_chain->data = NULL; | |
free(data_to_free); | |
data_to_free=NULL; | |
/* then we free the node */ | |
free(curnode_in_chain); | |
curnode_in_chain=NULL; | |
} else | |
before_curnode=before_curnode->next; | |
} | |
} | |
return found; | |
} | |
/* xstrdup: | |
* an strdup function to hide #defines and check for malloc | |
* errors on non-linux machines. | |
*/ | |
static char *xstrdup(char *str) | |
{ | |
static char *strptr; | |
strptr=strdup(str); | |
#ifndef __linux__ | |
if(strptr == NULL ) | |
imem_err(PNAME,"strdup","xstrdup"); | |
#endif | |
return strptr; | |
} | |
static node *xmalloc( size_t size) | |
{ | |
#ifdef __linux__ | |
return (node *)malloc(size); | |
#else | |
static node *nodeptr; | |
if ((nodeptr = (node *) malloc(size)) == NULL) | |
imem_err(PNAME,"malloc","xmalloc"); | |
else | |
return nodeptr; | |
#endif | |
} | |
#ifndef __linux__ | |
/* imem_err: A special error handler for non-linux systems. */ | |
static void imem_err(const char *progname, const char *libcall, const char *funcname) | |
{ | |
fprintf(stderr,"%s: memory alloc error by %s in function: %s\n",progname, libcall, funcname); | |
exit(1); | |
} | |
#endif |
I have upgraded the ihtable_hash function to use the FNV-1A algorithm as supplied by u/skeeto.
If you now change the HASHSIZE to something bigger, and change the indexing data type from int to size_t, you should have a beast at hand.
Gpl 2.0 Free for all, and with a disclaimer of liability.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have revised it, with the help of all the good people at r/C_programming.
Thank you a million times!