Skip to content

Instantly share code, notes, and snippets.

@tonious
Last active February 17, 2023 02:25
Show Gist options
  • Save tonious/1377667 to your computer and use it in GitHub Desktop.
Save tonious/1377667 to your computer and use it in GitHub Desktop.
A quick hashtable implementation in c.
/* Read this comment first: https://gist.github.com/tonious/1377667#gistcomment-2277101
* 2017-12-05
*
* -- T.
*/
#define _XOPEN_SOURCE 500 /* Enable certain library functions (strdup) on linux. See feature_test_macros(7) */
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <string.h>
struct entry_s {
char *key;
char *value;
struct entry_s *next;
};
typedef struct entry_s entry_t;
struct hashtable_s {
int size;
struct entry_s **table;
};
typedef struct hashtable_s hashtable_t;
/* Create a new hashtable. */
hashtable_t *ht_create( int size ) {
hashtable_t *hashtable = NULL;
int i;
if( size < 1 ) return NULL;
/* Allocate the table itself. */
if( ( hashtable = malloc( sizeof( hashtable_t ) ) ) == NULL ) {
return NULL;
}
/* Allocate pointers to the head nodes. */
if( ( hashtable->table = malloc( sizeof( entry_t * ) * size ) ) == NULL ) {
return NULL;
}
for( i = 0; i < size; i++ ) {
hashtable->table[i] = NULL;
}
hashtable->size = size;
return hashtable;
}
/* Hash a string for a particular hash table. */
int ht_hash( hashtable_t *hashtable, char *key ) {
unsigned long int hashval;
int i = 0;
/* Convert our string to an integer */
while( hashval < ULONG_MAX && i < strlen( key ) ) {
hashval = hashval << 8;
hashval += key[ i ];
i++;
}
return hashval % hashtable->size;
}
/* Create a key-value pair. */
entry_t *ht_newpair( char *key, char *value ) {
entry_t *newpair;
if( ( newpair = malloc( sizeof( entry_t ) ) ) == NULL ) {
return NULL;
}
if( ( newpair->key = strdup( key ) ) == NULL ) {
return NULL;
}
if( ( newpair->value = strdup( value ) ) == NULL ) {
return NULL;
}
newpair->next = NULL;
return newpair;
}
/* Insert a key-value pair into a hash table. */
void ht_set( hashtable_t *hashtable, char *key, char *value ) {
int bin = 0;
entry_t *newpair = NULL;
entry_t *next = NULL;
entry_t *last = NULL;
bin = ht_hash( hashtable, key );
next = hashtable->table[ bin ];
while( next != NULL && next->key != NULL && strcmp( key, next->key ) > 0 ) {
last = next;
next = next->next;
}
/* There's already a pair. Let's replace that string. */
if( next != NULL && next->key != NULL && strcmp( key, next->key ) == 0 ) {
free( next->value );
next->value = strdup( value );
/* Nope, could't find it. Time to grow a pair. */
} else {
newpair = ht_newpair( key, value );
/* We're at the start of the linked list in this bin. */
if( next == hashtable->table[ bin ] ) {
newpair->next = next;
hashtable->table[ bin ] = newpair;
/* We're at the end of the linked list in this bin. */
} else if ( next == NULL ) {
last->next = newpair;
/* We're in the middle of the list. */
} else {
newpair->next = next;
last->next = newpair;
}
}
}
/* Retrieve a key-value pair from a hash table. */
char *ht_get( hashtable_t *hashtable, char *key ) {
int bin = 0;
entry_t *pair;
bin = ht_hash( hashtable, key );
/* Step through the bin, looking for our value. */
pair = hashtable->table[ bin ];
while( pair != NULL && pair->key != NULL && strcmp( key, pair->key ) > 0 ) {
pair = pair->next;
}
/* Did we actually find anything? */
if( pair == NULL || pair->key == NULL || strcmp( key, pair->key ) != 0 ) {
return NULL;
} else {
return pair->value;
}
}
int main( int argc, char **argv ) {
hashtable_t *hashtable = ht_create( 65536 );
ht_set( hashtable, "key1", "inky" );
ht_set( hashtable, "key2", "pinky" );
ht_set( hashtable, "key3", "blinky" );
ht_set( hashtable, "key4", "floyd" );
printf( "%s\n", ht_get( hashtable, "key1" ) );
printf( "%s\n", ht_get( hashtable, "key2" ) );
printf( "%s\n", ht_get( hashtable, "key3" ) );
printf( "%s\n", ht_get( hashtable, "key4" ) );
return 0;
}
@igor375
Copy link

igor375 commented Dec 27, 2017

working variant below:
a) we have no need for hash table size more than possible elements count
b) in case of duplicate hash, just store value to the next free slot
c) just for less details - fixed key/value length

#include <stdio.h>
#include <string.h>

struct entry_s {
	char not_empty;
	char key[7];
	char val[16];
};

typedef struct entry_s entry_t;

struct ht_s {
	int size;
	struct entry_s *tab;	
};

typedef struct ht_s ht_t;


/* Create a new ht. */
ht_t *ht_create(int size) {
	ht_t *ht = NULL;
	int i;

	if (size < 1) return NULL;

	/* Allocate the tab itself. */
	if ((ht = malloc(sizeof(ht_t))) == NULL )
		return NULL;
	/* Allocate pointers to the head nodes. */
	if ((ht->tab = malloc(sizeof( entry_t) * size)) == NULL)
		return NULL;
	for (i = 0; i < size; i++)
		memset(&ht->tab[i], 0, sizeof(entry_t));

	ht->size = size;
	return ht;	
}

/* Hash a string for a particular hash tab. */
int ht_hash( ht_t *ht, char *key ) {
	unsigned int hv;
	int i = 0;

	/* Convert our string to an integer */
	while (i < strlen(key)) {
		hv += key[i];
		i++;
	}
	return hv % ht->size;
}

/* Insert a key-val pair into a hash tab. */
void ht_set(ht_t *ht, char *key, char *val) {
	int ind = ht_hash(ht, key), i = ind;

	for (; i < ht -> size; i++)
		if (!ht->tab[i].not_empty) {
			ht->tab[i].not_empty = 1;
			strcpy(ht->tab[i].key, key);
			strcpy(ht->tab[i].val, val);
			return;
		}
	for (i = 0; i < ind; i++)
		if (!ht->tab[i].not_empty) {
			ht->tab[i].not_empty = 1;
			strcpy(ht->tab[i].key, key);
			strcpy(ht->tab[i].val, val);
			return;
		}
}

/* Retrieve a key-val pair from a hash tab. */
char *ht_get(ht_t *ht, char *key) {
	int ind = ht_hash(ht, key), i = ind;

	for (; i < ht->size; i++)
		if ((ht->tab[i].not_empty) && !strcmp(ht->tab[i].key, key))
			return ht->tab[i].val;
	for (i = 0; i < ind; i++)
		if ((ht->tab[i].not_empty) && !strcmp(ht->tab[i].key, key))
			return ht->tab[i].val;
	return "not found";	
}


int main(void) {

	ht_t *ht = ht_create(4);

	ht_set(ht, "key1", "inky");
	ht_set(ht, "key2", "pinky");
	ht_set(ht, "key3", "blinky");
	ht_set(ht, "kez2", "floyd");

	printf( "%s\n", ht_get( ht, "key1" ) );
	printf( "%s\n", ht_get( ht, "key2" ) );
	printf( "%s\n", ht_get( ht, "key3" ) );
	printf( "%s\n", ht_get( ht, "kez2" ) );
	printf( "%s\n", ht_get( ht, "key4" ) );

	return 0;
}```

 

@etale-cohomology
Copy link

@igor375 I like your hash table implementation, but it doesn't ever free memory =(

@perlboy-emeritus
Copy link

perlboy-emeritus commented Mar 10, 2018

This is a hashing algorithm I implemented for a client in 1992. The client needed
to store hundreds of thousands of graphics files in an MRP system on AIX servers but
long before database BLOBs were released into the wild. (Full disclosure: I would
not have used BLOBs even if they had been available.) The idea was to create a
set of directories numbered 0..SOME NUMBER and find the image files by hashing a
normalized string that represented a filename. This one's signature has been
modified for use in hash.c. The actual implementation's return expression was:

return (hash % PRIME) % QUEUES;

where PRIME = 23017 and QUEUES = 503. Both are prime numbers, PRIME to encourage
a uniform pseudo-random distribution and QUEUES was the number of file system
directories deemed needed to hold the collection and its expected growth at the
time, as in 0..QUEUES-1. I tested the distribution of keys exhaustively for
uniformity before implementing. The client was pleased and when last I consulted
to them in 2004, the algorithm was still in use. Weinberger's algorithm was
published in Programming in C++, Dewhurst & Stark, PH, 1989, ISBM 0-13-723156-3;
Copyright © Peter Weinberger, AT&T Bell Laboratories, Inc., 1989. Obviously, you
may need permission to use it in a commercial distribution but not to study or as
I did, for a client who uses it only internally. It is so well-known that
anti-plagiarism software will detect it, but the point is, don't reinvent the wheel. Stand
on the shoulders of giants. If you're a student, your project might be to use this or some
other hashing algorithm in a novel and a unique to you, application, of course, citing
your sources, unless you're studying numerical programming, and then all bets are off.

/* Hash a string for a particular hash table. */
int ht_hash( size_t ht_size, char *key ) {
    unsigned int hash = 0, g;
    register char *p;   /* pointer used to scan and access string */

    /* Convert our string to an integer using Peter Weinberger's algorithm */
    for (p = key; *p; p++) {
        hash = (hash << 4) + *p;
        /* assumes a 32 bit int size */
        if ((g = hash & 0xf0000000)) {
            hash ^= g >> 24;
            hash ^= g;
        }
    }
    return hash % ht_size;
}

@tonious
Copy link
Author

tonious commented Apr 27, 2018

Nice. I wish I knew about that hash when I wrote the my version :laughing:

I was more focused on memory management at the time. Being a little further on in my career, I think I'd start with a test suite and build backwards.

@OSTnm
Copy link

OSTnm commented May 11, 2018

hashval is not initialized to 0;

int ht_hash(hashtable_t *hashtable, char *key) {

**unsigned long int hashval = 0;**
int i = 0;

/* Convert our string to an integer */
while(hashval < ULONG_MAX && i < strlen(key)) {
	hashval = hashval << 8;
	hashval += key[i];
	i++;
}

return hashval % hashtable->size;

}

@Liron24
Copy link

Liron24 commented May 26, 2018

Hi,
This is very informative, thank you for sharing :)
I have a project in C which I need to use some kind of DB to store information which is basically a large table with a lot of fields for each entry. The code should work online and I'm always getting more entries and I don't know the hash table size in advance.
I also need to keep that DB sorted since I need to compare between the fields and make decision accordingly.
I want to use the hash table for having the field that I need to decide for as the key and a struct with all the other fields as values.
I was wondering about:
1.Can I dynamically enlarge the table and not set a predefined size to it? Any Ideas how? Would it make is much more compliacted in calculating the keys-to-hashes?
2.Is there a fast/efficient way to keeo the table sorted by the keys?
3.Can I make the key to be int? How the ht_hash would impact by that?

In case yo have other ideas for me regarding DBs I can use in C, I would love to hear them.

Thank you!

@fermosh
Copy link

fermosh commented Jul 17, 2018

@elinux9000
Copy link

Memory leak if table allocation fails.

/* Allocate pointers to the head nodes. */
if ((ht->tab = malloc(sizeof( entry_t) * size)) == NULL)
	return NULL;

should be

/* Allocate pointers to the head nodes. */
if ((ht->tab = malloc(sizeof( entry_t) * size)) == NULL)
{ 
    free(ht);
    return NULL;
}

@yanichik
Copy link

yanichik commented Dec 3, 2018

@tonious what is the purpose of typedefing the entry_s and hashtable_s structs to entry_t & hashtable_t ?
thanks.

@datvuthanh
Copy link

datvuthanh commented Sep 17, 2019

Delete entries

void ht_del( hashtable_t *hashtable, char *key){
    
    int bin = 0;
    int flag =0;
    
    entry_t *temp,*pair;
    
    bin = ht_hash (hashtable,key);
    
    pair = hashtable->table[bin]; // Go to index node
   
    temp = pair;
    while(pair != NULL) {
        if(strcmp(key,pair->key) == 0 ){
            flag = 1;
            if(pair == hashtable->table[bin]){
                hashtable->table[bin] = pair->next;
            }
            else{
                temp->next = pair->next;
            }
            free(pair);
            break;
        }
        temp = pair;
        pair = pair->next;
    }
    if (flag){
        printf("Data deleted successfully from Hash Table\n");
    }
    else{
        printf("Given data is not present in hash table!!!!\n");
    }
}

@fgl82
Copy link

fgl82 commented Nov 3, 2019

@tonious Hey, I'm using this on my program, I don't care if it has bugs as for my purposes it works just fine.
Do I need to do anything else other than mention where I took this code from?
Thanks.

@tonious
Copy link
Author

tonious commented Nov 4, 2019

@fgl82, I just took a look at simplemenu, and I agree. This actually isn't a horrible solution for your purposes. It's still a horrible solution for all other purposes 😁.

I'd recommend using a better hashing algorithm. perlboy-emeritus has a great comment on this. But that's a fine stroke, and won't matter in your use case.

FWIW, I have a gameboy zero build that I really like, but emulation station really is too heavy for the way I use it. I can't wait to see how simplemenu turns out.

@fgl82
Copy link

fgl82 commented Dec 10, 2019

@tonious, I'm just seeing this reply! Man, thanks for this, it's working great after tweaking it just a bit, have had no complaints so far!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment