Skip to content

Instantly share code, notes, and snippets.

@McUsr
Last active June 5, 2023 20:30
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save McUsr/2b0376579f2b642cd1273115b840f6ba to your computer and use it in GitHub Desktop.
Save McUsr/2b0376579f2b642cd1273115b840f6ba to your computer and use it in GitHub Desktop.
/* Basic hash example an example found on the internet
*
*
* I u/McUsr made it work flawlessly with the good help of
* r/C_programming:
* u/Bitwise_Gamgee, u/No-Adhesiveness-8125 and u/curiously-b2.
* I got insights and suggestions from u/t40, u/ThatsMyFavoriteThing.
* u/flyingiron, u/Daiktana, and last but not least u/skeeto!
* Thanks a bunch guys!
*
* Any errors that may turn up in this file as it is provided,
* are mine, the code is consciously tested, with both valgrind, and
* with cc -Wall -Wextra -Wpedantic -fsanitize=address,undefined htable.c
*
* (c) 2023 McUsr Gpl 2.0 https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html
*
* Usage:
*
* As you see fit: the idea is to copy it into the folder you intend to use it.
* My idea, is to change the node structure when I need that, but keep something
* named key in there, which is a hash based on a nested structure named data.
*
* Of course I must go in and add free routines everywhere any data should be freed,
* but to me, that seems like a small price to pay for not having a lot of overhead.
*
* TODO: insert functions for measuring the number of collisions
* the fill grade, percentage wide, and other such stats.
*
* Rev 1.
* I think it works well enough with 320.000 words, for hobby
* use at least.
*
* Rev 2.
* I have followed every advice I got on reddit that I thought made sense
* https://www.reddit.com/r/C_Programming/comments/141cyzh/please_critque_the_hashtable_that_is_now_fixed/
*
* Thank you all so much for your efforts, suggestions, questions and ideas.
* I feel it is kind of robust now, at least for home-use.
*
* And most important, I have learned alot!
*
* Thank you so much!
*
* Stats on my machine:
*
* time a.out <~/dict/web2 |wc -l (235.000 lines of text, on word on each)
* real 0m0.403s
* user 0m0.347s
* sys 0m0.069s*
*
* Absolutely commensurable for me!
*
* Version3:
*
* I ended up implementing a new FNV ihtable_hash just as u/skeeto made it, (I adapted it).
* I kept the intsize HASHSIZE value, but that can be replaced with a much bigger primenumber now,
* if you change the use of ints for lookups, with size_t.
* But for now, the hashkey I use is fine as it is, and I'll enjoy the improved distribution of keys.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include<string.h>
/*----This is the start of where you snip it, if you make an include file
*----that you include into this file, and other files.
*/
#define HASHSIZE 35317
#define MAXLINE 1024
typedef struct tnode {
char *data;
struct tnode *next;
} node;
/* htable_init: initialize hashtable: you still need to have declared
* your hash table and allocated memory for it in main()
* up front, (like in the main() routine of the driver. */
void htable_init(node * hashtable);
void htable_insert(node * hashtable, char *str);/* insert data into hashtable */
void htable_display(node * hashtable); /* display hashtable */
int htable_delete(node * hashtable, char *str); /* delete an entry from hashtable */
node * htable_get(node * hashtable, char *str); /* returns an entry from a hashtable */
int htable_search(node * hashtable, char *str); /* searches for an entry */
void htable_destroy(node * hashtable);
const char PNAME[]="htable_driver" ;
/*----This is the end of where you snip it, if you make an include file
*----that you include into this file and other files.
*/
/* ihtable_resolve:
* resolve collisions in hashtable by inserting into linked in new node */
static void ihtable_resolve(node * hashtable,
int loc, char *str);
/* ihtable_hash
* make hash key from string to insert into hashtable */
static int ihtable_hash(char *str);
/* xstrdup:
* An strdup function to hide #defines and check for mem alloc
* errors on non-linux machines.
*/
static char *xstrdup(char *str);
/* xmalloc:
* A malloc function to hide #defines and check for malloc
* errors on non-linux machines.
*/
static node *xmalloc( size_t size);
#ifndef __linux__
/* imem_err:
* print mem alloc err and die (for non-linux machines)*/
static void imem_err(const char *progname, const char *libcall, const char *funcname);
#endif
#define DRIVER_FOR_HASHMAP
#ifdef DRIVER_FOR_HASHMAP
int main(void)
{
char line[MAXLINE];
node *hashtable;
node *mynode;
int res = 0;
hashtable = xmalloc(HASHSIZE * sizeof(node));
htable_init(hashtable);
htable_insert(hashtable, "tan");
if (htable_search(hashtable,"tan"))
printf("I found \"tan\"!\n");
else
printf("I didn't find \"tan\"!\n");
mynode = htable_get(hashtable,"tan");
if (mynode != NULL)
printf("I found %s!\n",mynode->data);
else
printf("I didn't find \"tan\"\n");
/* this code probably adds newlines after the words. */
while ((fgets(line, MAXLINE, stdin)) != NULL) {
line[strlen(line)-1] = '\0' ;
htable_insert(hashtable, line);
}
res = htable_delete(hashtable, "tan");
printf("result of deleting \"tan\" : %d\n", res);
htable_display(hashtable);
htable_destroy(hashtable);
return 0;
}
#endif
/* htable_destroy:
* Deletes every node and in the end the hashtable itself.
* OBSERVE: you'll need add your own code to this, if you
* add an extra struct within node.
*/
void htable_destroy(node * hashtable)
{
/* Thanks: u/Bitwise_Gamgee and u/daikatana */
if (hashtable == NULL) {
fprintf(stderr, "Error: Null hashtable cannot be destroyed.\n");
exit(2);
}
for (int i = 0; i < HASHSIZE; i++)
if (hashtable[i].data != NULL) {
node *target = &hashtable[i];
while (target->next != NULL) {
node *node_to_delete = target->next;
target->next = node_to_delete->next;
free(node_to_delete->data);
node_to_delete->data=NULL;
free(node_to_delete);
node_to_delete=NULL;
}
free(hashtable[i].data);
hashtable[i].data = NULL;
}
free(hashtable);
hashtable = NULL;
}
/* htable_init: initialize hashtable: you still need to have declared
* your hash table and allocated memory for it in main()
* up front, (like in the main() routine of the driver. */
void htable_init(node * hashtable)
{
for (int i = 0; i < HASHSIZE; i++) {
hashtable[i].data = NULL;
hashtable[i].next = NULL;
}
}
/* insert data into hashtable */
void htable_insert(node * hashtable, char *str)
{
int index = 0;
/* determine hash key for str */
index = ihtable_hash(str);
if (hashtable[index].data != NULL) {
/* A collision occurs - resolve by chaining */
ihtable_resolve(hashtable, index, str);
} else {
hashtable[index].data = xstrdup(str);
}
}
/* ihtable_hash: FNV hashing.
* thanks to u/skeeto
* https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash
*/
static int ihtable_hash(char *str)
{
unsigned long long hash = 0xcbf29ce484222325;
for (; *str; str++) {
hash ^= (unsigned char)*str;
hash *= 0x00000100000001b3;
}
hash ^= hash >> 32;
return hash % HASHSIZE;
}
/* resolve collisions in hashtable */
static void ihtable_resolve(node * hashtable, int loc, char *str)
{
node *to_resolve = NULL;
to_resolve = hashtable + loc;
while (to_resolve->next != NULL)
to_resolve = to_resolve->next;
to_resolve->next = xmalloc(sizeof(node));
to_resolve->next->data = xstrdup(str);
to_resolve->next->next = NULL;
to_resolve = NULL;
}
/* htable_display :print contents of hashtable to stdout.
* A "driver" routine to prove that it works. */
void htable_display(node * hashtable)
{
node *target;
for (int i = 0; i < HASHSIZE; i++) {
if (hashtable[i].data != NULL) {
target = hashtable + i;
while (target)
/* printf("location: %d, data: %s", i, target->data), target = target->next; */
printf("%s\n", target->data), target = target->next;
}
}
}
/* returns an entry from hashtable ret NULL if NOT found */
node * htable_get(node * hashtable, char *str)
{
int index = ihtable_hash(str);
/* no item at this location */
if (hashtable[index].data == NULL)
return NULL;
/* only one item at this location */
if (hashtable[index].next == NULL) {
if (strcmp(hashtable[index].data, str) == 0) {
/* item found */
return &hashtable[index];
}
} else {
/* there is a chaining case */
node *target = hashtable + index;
/* linked list similar */
while (target->next != NULL) {
if (strcmp(target->next->data, str) == 0) {
return target->next;
} else
target=target->next;
}
}
return NULL;
}
/* searches an entry from hashtable ret 1 if found */
int htable_search(node * hashtable, char *str)
{
int index = ihtable_hash(str);
/* no item at this location */
if (hashtable[index].data == NULL)
return 0;
/* only one item at this location */
if (hashtable[index].next == NULL) {
if (strcmp(hashtable[index].data, str) == 0) {
/* item found */
return 1;
}
} else {
/* there is a chaining case */
node *target = hashtable + index;
/* linked list similar */
while (target->next != NULL) {
if (strcmp(target->next->data, str) == 0) {
return 1;
} else
target=target->next;
}
}
return 0;
}
/* delete an entry from hashtable ret 1 if did */
int htable_delete(node * hashtable, char *str)
{
char *data_to_free = NULL;
int index = 0,found=0;
index = ihtable_hash(str);
/* no item at this location */
if (hashtable[index].data == NULL)
return found;
/* only one item at this location */
if (hashtable[index].next == NULL) {
if (strcmp(hashtable[index].data, str) == 0) {
found=1;
/* item found */
data_to_free = hashtable[index].data;
hashtable[index].data = NULL;
free(data_to_free);
}
} else {
/* there is a chaining case */
node *before_curnode = hashtable + index;
node *curnode_in_chain=NULL;
/* the chain of nodes sharing a hashky is a linked list */
while (before_curnode->next != NULL) {
if (strcmp(before_curnode->next->data, str) == 0) {
found=1;
curnode_in_chain = before_curnode->next;
if (before_curnode->next->next != NULL)
before_curnode->next = before_curnode->next->next;
else
before_curnode->next = NULL;
/* we need to free the data in the node as well */
data_to_free = curnode_in_chain->data;
curnode_in_chain->data = NULL;
free(data_to_free);
data_to_free=NULL;
/* then we free the node */
free(curnode_in_chain);
curnode_in_chain=NULL;
} else
before_curnode=before_curnode->next;
}
}
return found;
}
/* xstrdup:
* an strdup function to hide #defines and check for malloc
* errors on non-linux machines.
*/
static char *xstrdup(char *str)
{
static char *strptr;
strptr=strdup(str);
#ifndef __linux__
if(strptr == NULL )
imem_err(PNAME,"strdup","xstrdup");
#endif
return strptr;
}
static node *xmalloc( size_t size)
{
#ifdef __linux__
return (node *)malloc(size);
#else
static node *nodeptr;
if ((nodeptr = (node *) malloc(size)) == NULL)
imem_err(PNAME,"malloc","xmalloc");
else
return nodeptr;
#endif
}
#ifndef __linux__
/* imem_err: A special error handler for non-linux systems. */
static void imem_err(const char *progname, const char *libcall, const char *funcname)
{
fprintf(stderr,"%s: memory alloc error by %s in function: %s\n",progname, libcall, funcname);
exit(1);
}
#endif
@McUsr
Copy link
Author

McUsr commented Jun 5, 2023

I have revised it, with the help of all the good people at r/C_programming.

Thank you a million times!

@McUsr
Copy link
Author

McUsr commented Jun 5, 2023

I have upgraded the ihtable_hash function to use the FNV-1A algorithm as supplied by u/skeeto.
If you now change the HASHSIZE to something bigger, and change the indexing data type from int to size_t, you should have a beast at hand.

@McUsr
Copy link
Author

McUsr commented Jun 5, 2023

Gpl 2.0 Free for all, and with a disclaimer of liability.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment