Skip to content

Instantly share code, notes, and snippets.

@tonious tonious/hash.c
Last active Apr 3, 2019

Embed
What would you like to do?
A quick hashtable implementation in c.
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <string.h>
struct entry_s {
char *key;
char *value;
struct entry_s *next;
};
typedef struct entry_s entry_t;
struct hashtable_s {
int size;
struct entry_s **table;
};
typedef struct hashtable_s hashtable_t;
/* Create a new hashtable. */
hashtable_t *ht_create( int size ) {
hashtable_t *hashtable = NULL;
int i;
if( size < 1 ) return NULL;
/* Allocate the table itself. */
if( ( hashtable = malloc( sizeof( hashtable_t ) ) ) == NULL ) {
return NULL;
}
/* Allocate pointers to the head nodes. */
if( ( hashtable->table = malloc( sizeof( entry_t * ) * size ) ) == NULL ) {
return NULL;
}
for( i = 0; i < size; i++ ) {
hashtable->table[i] = NULL;
}
hashtable->size = size;
return hashtable;
}
/* Hash a string for a particular hash table. */
int ht_hash( hashtable_t *hashtable, char *key ) {
unsigned long int hashval;
int i = 0;
/* Convert our string to an integer */
while( hashval < ULONG_MAX && i < strlen( key ) ) {
hashval = hashval << 8;
hashval += key[ i ];
i++;
}
return hashval % hashtable->size;
}
/* Create a key-value pair. */
entry_t *ht_newpair( char *key, char *value ) {
entry_t *newpair;
if( ( newpair = malloc( sizeof( entry_t ) ) ) == NULL ) {
return NULL;
}
if( ( newpair->key = strdup( key ) ) == NULL ) {
return NULL;
}
if( ( newpair->value = strdup( value ) ) == NULL ) {
return NULL;
}
newpair->next = NULL;
return newpair;
}
/* Insert a key-value pair into a hash table. */
void ht_set( hashtable_t *hashtable, char *key, char *value ) {
int bin = 0;
entry_t *newpair = NULL;
entry_t *next = NULL;
entry_t *last = NULL;
bin = ht_hash( hashtable, key );
next = hashtable->table[ bin ];
while( next != NULL && next->key != NULL && strcmp( key, next->key ) > 0 ) {
last = next;
next = next->next;
}
/* There's already a pair. Let's replace that string. */
if( next != NULL && next->key != NULL && strcmp( key, next->key ) == 0 ) {
free( next->value );
next->value = strdup( value );
/* Nope, could't find it. Time to grow a pair. */
} else {
newpair = ht_newpair( key, value );
/* We're at the start of the linked list in this bin. */
if( next == hashtable->table[ bin ] ) {
newpair->next = next;
hashtable->table[ bin ] = newpair;
/* We're at the end of the linked list in this bin. */
} else if ( next == NULL ) {
last->next = newpair;
/* We're in the middle of the list. */
} else {
newpair->next = next;
last->next = newpair;
}
}
}
/* Retrieve a key-value pair from a hash table. */
char *ht_get( hashtable_t *hashtable, char *key ) {
int bin = 0;
entry_t *pair;
bin = ht_hash( hashtable, key );
/* Step through the bin, looking for our value. */
pair = hashtable->table[ bin ];
while( pair != NULL && pair->key != NULL && strcmp( key, pair->key ) > 0 ) {
pair = pair->next;
}
/* Did we actually find anything? */
if( pair == NULL || pair->key == NULL || strcmp( key, pair->key ) != 0 ) {
return NULL;
} else {
return pair->value;
}
}
int main( int argc, char **argv ) {
hashtable_t *hashtable = ht_create( 65536 );
ht_set( hashtable, "key1", "inky" );
ht_set( hashtable, "key2", "pinky" );
ht_set( hashtable, "key3", "blinky" );
ht_set( hashtable, "key4", "floyd" );
printf( "%s\n", ht_get( hashtable, "key1" ) );
printf( "%s\n", ht_get( hashtable, "key2" ) );
printf( "%s\n", ht_get( hashtable, "key3" ) );
printf( "%s\n", ht_get( hashtable, "key4" ) );
return 0;
}
@tonious

This comment has been minimized.

Copy link
Owner Author

tonious commented Nov 18, 2011

I wouldn't use this for anything serious without adding some error checking -- this is really more of a code kata.

@tamsanh

This comment has been minimized.

Copy link

tamsanh commented Feb 25, 2013

"Time to grow a pair."

@owensss

This comment has been minimized.

Copy link

owensss commented Jun 3, 2013

line 53, hashval has not been initialized.

@drdickens

This comment has been minimized.

Copy link

drdickens commented Nov 27, 2013

does "hashval < ULONG_MAX" (line 57) make any sense ?

@mattdeboard

This comment has been minimized.

Copy link

mattdeboard commented Nov 30, 2013

@drdickens Yeah, ULONG_MAX is defined in limits.h, and it's the maximum value for a long unsigned int. This is (2^32 - 1). Had to look it up myself here. So basically you keep performing the hashval = hashval * 2^8 operation until you run out of characters in key or you bump against the 32-bit integer limit. That's my understanding, anyway.

@mattdeboard

This comment has been minimized.

Copy link

mattdeboard commented Nov 30, 2013

And thanks @tonious, this really cleared up some questions I had about hash tables. I understand it's not really a "serious" implementation but it's a really instructional one. Thanks again!

@ybouhjira

This comment has been minimized.

Copy link

ybouhjira commented Jan 18, 2014

It's a long unsigned int how is it going to exceed the limit for it's own type?

@ybouhjira

This comment has been minimized.

Copy link

ybouhjira commented Jan 18, 2014

Of course it's not going to exceed the limit just come back to 0

#include <stdio.h>
#include <limits.h>

int main()
{
    printf("%lu\n", ULONG_MAX); // 4294967295
    printf("%lu\n", ULONG_MAX + 1); // 0
    return 0;
}
@hypersoft

This comment has been minimized.

Copy link

hypersoft commented Feb 9, 2014

This is an Excellent Code Template. One of the best I have seen. You've managed to keep it simple, which allows for deterministic developer customizations.

Use Jenkins Hashing Algo...

    size_t hash, i;

#ifdef HASHTABLE_USE_SIMPLE_HASH
    for ( hash = i = 0; i < keyLength; hash = hash << 8, hash += key[ i++ ] );
#else /* Use: Jenkins' "One At a Time Hash" === Perl "Like" Hashing */
    // http://en.wikipedia.org/wiki/Jenkins_hash_function
    for ( hash = i = 0; i < keyLength; ++i ) {
        hash += key[i], hash += ( hash << 10 ), hash ^= ( hash >> 6 );
    }
    hash += ( hash << 3 ), hash ^= ( hash >> 11 ), hash += ( hash << 15 );
#endif

    return hash % hashTable->size;

Notice I used size_t instead of int. DO THIS for all integer variants. signed values are only useful when you need two sides of the same coin. Here we do not. It's a one sided coin in a single dimension, paradoxically speaking.

I don't know how it works out if size_t != 32 bits. Frankly I don't give a fuck. My concern is that the data lines up, not how stupid your compiler is.

Testing for overflow is irrelevant. The data will max out or loop over to 0 possibly +remainder depending on implementation specifics. You are also decreasing the 'self deterministic' characteristics of the algorithm by making logic determinations on output of the hash.

@hypersoft

This comment has been minimized.

Copy link

hypersoft commented Feb 9, 2014

The way you 'Set' new entries to your list is buggered. You should not be swapping out the first entry for the new entry. The list should stream in logical order unless you have applied a sorting algorithm or callback mechanism.

Get rid of the str* functions. Use memcmp instead. Cache the key length of the input as well as give the user a chance to define the length of the INPUTS.

struct entry_s {
  char * key;
  size_t keyLength;
  void * value;
  struct entry_s * successor;
};

Notice I changed your 'next' member to 'successor' which is altogether better english. As is, next could imply a verb or a noun which gives rise to much potential confusion. Same thing goes for that procedure with Set in its name. Use Put instead. Sets are another type of associative array. My version of 'Set' also allows the specification of a boolean overwrite, and returns a boolean status rather than void, which means we can set error codes for interested processes.

I've also done some more identifier changes:

pair --> entryItem // pair is not descriptive enough. Yet your grow a pair joke was hillarious.
newpair --> thisItem // now we really know what the fuck we are doing
last --> previousItem // wowee, almost ready to debug that code by linguistics alone.
next --> currentItem // looking sharp kid!
// I also added: primaryItem, to avoid a double lookup in list at the end of the Set function.
hashtable_s.table --> entry // It makes no sense to have a table in a table for our purpose.
bin --> list // yeah, that's what it "points" to a linked list.

// Small Preview:
    entry_t * currentItem = NULL, * previousItem = NULL,
    * primaryItem = currentItem = hashTable->entry[ list ], * thisItem;

// Hash Table Entry List? Fuck yeah, we are definitely cooking with GAS.

My version, (thanks to your contribution of course) Allows for binary keys and values by allowing the user to specify length of input. If the length given for an input is ZERO, then it is logically assumed that it MUST be a bona-fide char *, for which we CAN use strlen or some other inline byte scanner. Just to be sure it works out for other folks, I added compiler option: -fno-strict-aliasing. At this point since you have added a keyLength member to your entries... You can validate your keys by testing the key lengths FIRST, then if equal, perform a memcmp and check explicitly for ZERO return value. Saves much processing, and is absolutely imperative when supplying binary datum for keys and values.

For my next trick, I'm adding a logical order list, equal to the size of the allocated 'table' as you called it. In this ZERO Initialized unlinked offset list, I will store in SET OPERATION order, the offsets of the 'buckets' (or 'bins' as you called them), so that an enumeration call back, can be supplied with logical ordered lists, without having to muck around with searching algorithms, storing order properties on entries, or massive link dereferencing to enumerate the Hash Table Entries. The most immediate drawback to this approach, is that if a list is removed completely from the hash table, the entire array of offsets will need to be shifted to maintain logical order. Of course, since a user can't readily determine how to delete an entire list, this is a rare event. This process could be optimized by providing a varag function that takes a list of keys to delete, from there, its simply delete all targets and fill in the blanks.

OOPS, in the midst of all this self talk, I realized I overlooked the fact that I will indeed have to track the address of EACH new KVP to maintain order. Which, is not bad at all. When it comes time to destruct, its a single serial operation, AND, we can add a performance hit counter to each entry and add some automatic access tuning without getting all trussed up in the process.

You may also note, that with binary inputs... You could actually use this beast to implement a pseudo array that can have named properties. You also aren't held back by string encoding. Multiple encodings equates to multiple entries of course, so you'll have to settle for something or everything, which is much better than nothing!

Thanks for this great opportunity to stretch my legs. I'll come back later and post my repo. But I really do believe, if you are interested in this topic of computer science, as well as learning, and trying NEW things, the example code given is the best there is, because it IS NOT PERFECTED, but in its simplicity, it is PERFECT.

@hypersoft

This comment has been minimized.

Copy link

hypersoft commented Feb 9, 2014

Oh yeah, your enable strdup feature, (which I replaced with memcpy( malloc( length ), key, length ) because I have the data lengths) actually disables features on future platforms. The correct define would be:

/* Enable strdup */
#ifndef __USE_XOPEN2K8
    #define __USE_XOPEN2K8 1
#endif

Because that flag is boolean, rather than version dependent. I use netbeans. They beat the fuck out of GNU IDE Dev Tools, and IMHO its a piece of shit rat turd begging for an ass kicking. Certainly however, code refactoring, active source code analysis and hyperlinked navigation of code modules, does you much more justice than man bullshit(7), because you actually get to see what's defined and what is not, even in the system headers which is how I found this undocumented feature. It may be that it is undocumented, but in the least, we won't be disabling anything unless it is potentially very hazardous, unavailable, or simply restricted.

@JuliaDream

This comment has been minimized.

Copy link

JuliaDream commented Mar 21, 2014

I guess, "hashtable" should be NULL checked in ht_get and ht_set.

@JuliaDream

This comment has been minimized.

Copy link

JuliaDream commented Mar 21, 2014

And, a release function is missing...

@counterpro

This comment has been minimized.

Copy link

counterpro commented Apr 8, 2014

hi am new to this and I need someone to help me, my problem is how to avoid using coliciones see a tree in a list, thank you very much in advance
my email is counterpro09@gmail.com

@joaofnfernandes

This comment has been minimized.

Copy link

joaofnfernandes commented May 1, 2014

Hi,
Thanks for taking the time to write this implementation. I've learned a lot from it.

You could make the void ht_set( hashtable_t *hashtable, char *key, char *value ) function a bit simpler.
Currently your logic tries to:

  • Find if the key already exists in the table
  • If it doesn't exist, find here you are on the pairs linked list (beginning, middle, or end) and add your new key/value pair there.

Why not simply, find if the key already exists, and if it doesn't, add a new key/value pair as the first element of that bin?

@tonious

This comment has been minimized.

Copy link
Owner Author

tonious commented Jun 2, 2014

Hey folks! For some reason I didn't seen any notifications on the comment traffic for this gist. Thanks for taking a look.

There's some really excellent commentary here.

First off, it looks like I did bugger the range check in ht_hash. What it should be doing is checking to see if adding one more byte will roll it over, as opposed to trying to determine if it already has rolled over. The idea being that 'beef' and 'beefstake' should both end up in the same bin. Another way of doing this would be by using sizeof to determine how many characters really fit in our hash key. However, it's still a very naive hash. Hypersoft is right -- a better hashing algorithm should be used in real life.

Hypersoft is also right about using size_t for anything referring to length or size. And about using mem* functions rather than str* functions. I did say this was a naive implementation, right? ;)

Variable names are always worth doing better, and I have bounced bad names back in code review. That said, I think most devs would understand 'next' when looking at a linked list, while 'successor' may require some thought.

In retrospect, trying to keep insertions ordered is probably a refinement too far. Both Hypersoft and joaofnfernandes called that out. It adds complexity at insertion time, but does not save any complexity or time at retrieval time.

JuliaDream is right -- there should be additional null checks. I didn't write a release function, as this was never intended to be anything more than a toy.

And last but not least, owensss is indeed correct about an uninitialized variable. That's pretty dangerous in a hashing function.

Thanks again everybody. I'm not sure I'll update this, but I certainly do appreciate the commentary.

@tekknolagi

This comment has been minimized.

Copy link

tekknolagi commented Jul 27, 2014

Hi @tonious. I would like to use a modified implementation of this in a small project. Are you okay with that? Under what license?

Additionally, if this is not a fantastic implementation to use (needs more error checking, etc), what would you recommend that is similarly lightweight?

@xwiz

This comment has been minimized.

Copy link

xwiz commented Jul 28, 2014

@tekknolagi you might want to take a look at this :)

@tonious

This comment has been minimized.

Copy link
Owner Author

tonious commented Aug 1, 2014

Go ahead and just use it, I'd call this public domain. However, there are quite a few better implementations linked in this Stack Overflow post.

If you're going to use this in any kind of a production setting, I'd find something with a long usage history, or a complete set of unit tests, or ideally, both.

@nvurgaft

This comment has been minimized.

Copy link

nvurgaft commented Nov 16, 2014

Been looking for something like this and this is awesome. Thanks!

@ximik777

This comment has been minimized.

Copy link

ximik777 commented Dec 3, 2014

Hello! Thank you for your example! I'm just starting to learn C. You can write a proper example for the purification of memory sounding? And how can I make sure that all the memory that has been allocated during the program was cleared after its completion? Thanks!

@ateddy42

This comment has been minimized.

Copy link

ateddy42 commented May 6, 2015

@ximik777 Try using Valgrind. Install it, then check for leaks using valgrind -q --leak-check=full ./hash. That should give you a pretty detailed analysis of what memory was not freed up.

@tahir24434

This comment has been minimized.

Copy link

tahir24434 commented Jul 19, 2015

As @owenss mentioned, line 53, hashval has not been initialized.
Following would make more sense.
unsigned long hash = 5381;

@ntish

This comment has been minimized.

Copy link

ntish commented Jul 28, 2015

@tonious instead of a char *value i want to add a struct ....what i am suppose to do ??
will this algorithm work for structure ??

@arinal

This comment has been minimized.

Copy link

arinal commented Aug 17, 2015

@ntish have a look at my modifications:
ht.c
ht.h

To see how it is tested, look at testHashtable function inside test.c
What you're supposed to do is to learn something called void * :)

@tonious thanks for your quick implementation, I am using it on my project. I know your concern, its still in its infancy and just used as a POC project.

@jesusygeremias

This comment has been minimized.

Copy link

jesusygeremias commented Oct 20, 2015

@tonious You must initialize unsigned long int hashval=0; because you are generating different hashcodes to the same key.

@Arjunkalyan

This comment has been minimized.

Copy link

Arjunkalyan commented Nov 7, 2015

Hello tonious,
I didn't find limit.h file...where is it..?

@joe-skb7

This comment has been minimized.

Copy link

joe-skb7 commented Nov 7, 2015

@Arjunkalyan You mean limits.h? It should be at /usr/include/limits.h and it usually comes with libc6-dev package (contains header files for C Standard Library for GCC). If you install manpages-posix-dev package, you will see man page for it (with man limits.h command). If your compiler misses this file -- well, it's kinda sucks :)

@superjamie

This comment has been minimized.

Copy link

superjamie commented Dec 3, 2015

There's a potential memory leak on line 38. If table = malloc() fails, then hashtable is still allocated from line 33. Line 39 should free(hashtable) before returning NULL.

Of course if malloc() is failing you've probably got bigger problems...

@samveen

This comment has been minimized.

Copy link

samveen commented Dec 10, 2015

@tonious I couldn't find the source of the reference of the relation between (inky, pinky, blinky) and floyd.Where did you get the reference from? (Google gives just instances of your code (thank you for it) which is spread all over the place)

@gourao

This comment has been minimized.

Copy link

gourao commented Jan 4, 2016

In ht_hash line 53, you need to initialize hashval to 0.

@gourao

This comment has been minimized.

Copy link

gourao commented Jan 5, 2016

You also need to fix ht_remove:

        if (pair == hashtable->table[bin]) {
            hashtable->table[bin] = pair->next;
        }   
@maximgavrilov

This comment has been minimized.

Copy link

maximgavrilov commented Feb 26, 2016

This code makes terrible hash, because it is just first (as projected) or last (as implemented) 4-8 chars in string. Any strings set with the same prefix/suffix will trash your hash.

hashval = hashval << 8;
hashval += key[i];

Most popular way to calculate some simple hash for strings is something like hashval = key[i] + 31 * hashval, with possible modifications of some prime number instead of 31.

@CecilWard

This comment has been minimized.

Copy link

CecilWard commented May 26, 2016

Using signed arithmetic everywhere when should be using unsigned.

@ChangeMyName

This comment has been minimized.

Copy link

ChangeMyName commented Dec 19, 2016

can some one explain me this line of code please? hashval = hashval << 8; why bitshift??

@masein

This comment has been minimized.

Copy link

masein commented Dec 20, 2016

why do we use typedef ?

@nskforward

This comment has been minimized.

Copy link

nskforward commented Mar 25, 2017

How to implement "ht_free" function?

@lewisrobbins

This comment has been minimized.

Copy link

lewisrobbins commented Mar 29, 2017

@ChangMyName it's just the hashing function used. You're free to use any others.

@Eksapsy

This comment has been minimized.

Copy link

Eksapsy commented Jun 5, 2017

This piece of code has a memory leak I think.
When you allocate the HashTable array in the ht_create() function, and then you return NULL in case malloc() couldn't allocate any memory, you don't free first the hashTable_t.
Which I think results in a memory leak.

@ghost

This comment has been minimized.

Copy link

ghost commented Jul 19, 2017

You should use calloc in HT_create instade of malloc and then put null everywhere. It's cleaner and safer because malloc could return virtual memory even if memory is exhausted.

@msasad

This comment has been minimized.

Copy link

msasad commented Dec 1, 2017

Search operation in a hashtable must be O(1). In this implementation, it is O(n).

@dmcmax

This comment has been minimized.

Copy link

dmcmax commented Dec 4, 2017

there is a method/function to clear hast table or delete single key-value pair?

@rlegko

This comment has been minimized.

Copy link

rlegko commented Dec 5, 2017

I think the function ht_hash has some severe flaws.
First, as did owensss notice, the variable hashval is not initialized. On most architectures it will have the value that was left in the stack by the last function that used that location, maybe this one. This results in some random initialization on each call, which may return different values for identical keys.
Then the test hashval < ULONG_MAX is useless. As pointed above the purpose "the idea being that 'beef' and 'beefstake' should both end up in the same bin" by "checking to see if adding one more byte will roll it over" is completely missed. But why should 'beef' and 'beefstake' both end up in the same bin ? As much as possible, we want that different keys end up in different bins. It would be nice if we could make the computation of hashval with infinite precision, so that every character has its contribution to the result.
The good new is : you don't need infinite precision for that. This is because any way you take at the end the computed value modulo hashtable->size. The computations modulo some value M have an interesting property : as long as you do only additions and multiplications, the final value do not change if you replace any intermediate result by its value modulo M. Expressed in a more pedantic way, modulo M is an homomorphism. So you can take modulo hashtable->size at each step of the loop, which ensures that you will never roll over as long as hashtable->size is less than ULONG_MAX>>8, and the final result will be the same as if it was computed with infinite precision. Let's rewrite the function as follows :

int ht_hash( hashtable_t *hashtable, char *key ) {

unsigned long int hashval = 0;
int i = 0;

/* Convert our string to an integer */
while( i < strlen( key ) ) {
	hashval = ((hashval << 8) +  key[ i ]) %  hashtable->size;
	i++;
}
return hashval;

}

@tonious

This comment has been minimized.

Copy link
Owner Author

tonious commented Dec 6, 2017

Hey folks, original implementer here.

This was a quick code kata I wrote almost seven years ago. I've not updated it since then. And I'm not going to.

I'm happy to leave this here for the sake of retaining all the commentary, but I have some notes for coders that are new to this thread.

Do not use this in production.

It's public domain, it's just fundamentally buggy.

If you're writing code for a larger system, use a real data structures library. There's a perfectly servicable hash table implementation in glibc! These will be better implemented and better tested.

On the other hand if you're writing for an embedded system, you should probably not use a hash table. There are many better techniques for managing limited memory.

And on the gripping hand, if you're writing kernel code, well, you should be able to debug this with your eyes closed.

Read the thread before commenting.

This code has been hanging out for seven years. There are C programmers that are younger than this code sample.

You get the same points (zero) for not noticing that someone else has already found a given bug, as you would for copying their comment and submitting as your own.

I'd like to leave this thread here, but I don't want a stream of commenters rediscovering the same bugs.

This was not homework. This is not your homework.

This page is literally the first google result for 'Hash Table in C'. Do you think your markers don't know that? In fact, I've actually seen it handed to markers as example code that is evidence of plagiarism. It has some very distinctive bugs, and will not pass the dumbest of code similarity tests. Your only hope is that whoever marks this isn't paying attention and doesn't use automated plagiarism detection tools.

And whatever you're doing, do not submit it to my own grad advisor, at my own alma mater. That's the flirting-with-your-girlfriends-sister version of academic misconduct. Yes, this actually happened.

@tombustayoung

This comment has been minimized.

Copy link

tombustayoung commented Dec 6, 2017

big deal

@igor375

This comment has been minimized.

Copy link

igor375 commented Dec 27, 2017

working variant below:
a) we have no need for hash table size more than possible elements count
b) in case of duplicate hash, just store value to the next free slot
c) just for less details - fixed key/value length

#include <stdio.h>
#include <string.h>

struct entry_s {
	char not_empty;
	char key[7];
	char val[16];
};

typedef struct entry_s entry_t;

struct ht_s {
	int size;
	struct entry_s *tab;	
};

typedef struct ht_s ht_t;


/* Create a new ht. */
ht_t *ht_create(int size) {
	ht_t *ht = NULL;
	int i;

	if (size < 1) return NULL;

	/* Allocate the tab itself. */
	if ((ht = malloc(sizeof(ht_t))) == NULL )
		return NULL;
	/* Allocate pointers to the head nodes. */
	if ((ht->tab = malloc(sizeof( entry_t) * size)) == NULL)
		return NULL;
	for (i = 0; i < size; i++)
		memset(&ht->tab[i], 0, sizeof(entry_t));

	ht->size = size;
	return ht;	
}

/* Hash a string for a particular hash tab. */
int ht_hash( ht_t *ht, char *key ) {
	unsigned int hv;
	int i = 0;

	/* Convert our string to an integer */
	while (i < strlen(key)) {
		hv += key[i];
		i++;
	}
	return hv % ht->size;
}

/* Insert a key-val pair into a hash tab. */
void ht_set(ht_t *ht, char *key, char *val) {
	int ind = ht_hash(ht, key), i = ind;

	for (; i < ht -> size; i++)
		if (!ht->tab[i].not_empty) {
			ht->tab[i].not_empty = 1;
			strcpy(ht->tab[i].key, key);
			strcpy(ht->tab[i].val, val);
			return;
		}
	for (i = 0; i < ind; i++)
		if (!ht->tab[i].not_empty) {
			ht->tab[i].not_empty = 1;
			strcpy(ht->tab[i].key, key);
			strcpy(ht->tab[i].val, val);
			return;
		}
}

/* Retrieve a key-val pair from a hash tab. */
char *ht_get(ht_t *ht, char *key) {
	int ind = ht_hash(ht, key), i = ind;

	for (; i < ht->size; i++)
		if ((ht->tab[i].not_empty) && !strcmp(ht->tab[i].key, key))
			return ht->tab[i].val;
	for (i = 0; i < ind; i++)
		if ((ht->tab[i].not_empty) && !strcmp(ht->tab[i].key, key))
			return ht->tab[i].val;
	return "not found";	
}


int main(void) {

	ht_t *ht = ht_create(4);

	ht_set(ht, "key1", "inky");
	ht_set(ht, "key2", "pinky");
	ht_set(ht, "key3", "blinky");
	ht_set(ht, "kez2", "floyd");

	printf( "%s\n", ht_get( ht, "key1" ) );
	printf( "%s\n", ht_get( ht, "key2" ) );
	printf( "%s\n", ht_get( ht, "key3" ) );
	printf( "%s\n", ht_get( ht, "kez2" ) );
	printf( "%s\n", ht_get( ht, "key4" ) );

	return 0;
}```

 
@etale-cohomology

This comment has been minimized.

Copy link

etale-cohomology commented Feb 7, 2018

@igor375 I like your hash table implementation, but it doesn't ever free memory =(

@perlboy-emeritus

This comment has been minimized.

Copy link

perlboy-emeritus commented Mar 10, 2018

This is a hashing algorithm I implemented for a client in 1992. The client needed
to store hundreds of thousands of graphics files in an MRP system on AIX servers but
long before database BLOBs were released into the wild. (Full disclosure: I would
not have used BLOBs even if they had been available.) The idea was to create a
set of directories numbered 0..SOME NUMBER and find the image files by hashing a
normalized string that represented a filename. This one's signature has been
modified for use in hash.c. The actual implementation's return expression was:

return (hash % PRIME) % QUEUES;

where PRIME = 23017 and QUEUES = 503. Both are prime numbers, PRIME to encourage
a uniform pseudo-random distribution and QUEUES was the number of file system
directories deemed needed to hold the collection and its expected growth at the
time, as in 0..QUEUES-1. I tested the distribution of keys exhaustively for
uniformity before implementing. The client was pleased and when last I consulted
to them in 2004, the algorithm was still in use. Weinberger's algorithm was
published in Programming in C++, Dewhurst & Stark, PH, 1989, ISBM 0-13-723156-3;
Copyright © Peter Weinberger, AT&T Bell Laboratories, Inc., 1989. Obviously, you
may need permission to use it in a commercial distribution but not to study or as
I did, for a client who uses it only internally. It is so well-known that
anti-plagiarism software will detect it, but the point is, don't reinvent the wheel. Stand
on the shoulders of giants. If you're a student, your project might be to use this or some
other hashing algorithm in a novel and a unique to you, application, of course, citing
your sources, unless you're studying numerical programming, and then all bets are off.

/* Hash a string for a particular hash table. */
int ht_hash( size_t ht_size, char *key ) {
    unsigned int hash = 0, g;
    register char *p;   /* pointer used to scan and access string */

    /* Convert our string to an integer using Peter Weinberger's algorithm */
    for (p = key; *p; p++) {
        hash = (hash << 4) + *p;
        /* assumes a 32 bit int size */
        if ((g = hash & 0xf0000000)) {
            hash ^= g >> 24;
            hash ^= g;
        }
    }
    return hash % ht_size;
}
@tonious

This comment has been minimized.

Copy link
Owner Author

tonious commented Apr 27, 2018

Nice. I wish I knew about that hash when I wrote the my version 😆

I was more focused on memory management at the time. Being a little further on in my career, I think I'd start with a test suite and build backwards.

@OSTnm

This comment has been minimized.

Copy link

OSTnm commented May 11, 2018

hashval is not initialized to 0;

int ht_hash(hashtable_t *hashtable, char *key) {

**unsigned long int hashval = 0;**
int i = 0;

/* Convert our string to an integer */
while(hashval < ULONG_MAX && i < strlen(key)) {
	hashval = hashval << 8;
	hashval += key[i];
	i++;
}

return hashval % hashtable->size;

}

@Liron24

This comment has been minimized.

Copy link

Liron24 commented May 26, 2018

Hi,
This is very informative, thank you for sharing :)
I have a project in C which I need to use some kind of DB to store information which is basically a large table with a lot of fields for each entry. The code should work online and I'm always getting more entries and I don't know the hash table size in advance.
I also need to keep that DB sorted since I need to compare between the fields and make decision accordingly.
I want to use the hash table for having the field that I need to decide for as the key and a struct with all the other fields as values.
I was wondering about:
1.Can I dynamically enlarge the table and not set a predefined size to it? Any Ideas how? Would it make is much more compliacted in calculating the keys-to-hashes?
2.Is there a fast/efficient way to keeo the table sorted by the keys?
3.Can I make the key to be int? How the ht_hash would impact by that?

In case yo have other ideas for me regarding DBs I can use in C, I would love to hear them.

Thank you!

@fermosh

This comment has been minimized.

@elinux9000

This comment has been minimized.

Copy link

elinux9000 commented Jul 30, 2018

Memory leak if table allocation fails.

/* Allocate pointers to the head nodes. */
if ((ht->tab = malloc(sizeof( entry_t) * size)) == NULL)
	return NULL;

should be

/* Allocate pointers to the head nodes. */
if ((ht->tab = malloc(sizeof( entry_t) * size)) == NULL)
{ 
    free(ht);
    return NULL;
}
@yanichik

This comment has been minimized.

Copy link

yanichik commented Dec 3, 2018

@tonious what is the purpose of typedefing the entry_s and hashtable_s structs to entry_t & hashtable_t ?
thanks.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.