-
-
Save tonious/1377667 to your computer and use it in GitHub Desktop.
/* Read this comment first: https://gist.github.com/tonious/1377667#gistcomment-2277101 | |
* 2017-12-05 | |
* | |
* -- T. | |
*/ | |
#define _XOPEN_SOURCE 500 /* Enable certain library functions (strdup) on linux. See feature_test_macros(7) */ | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <limits.h> | |
#include <string.h> | |
struct entry_s { | |
char *key; | |
char *value; | |
struct entry_s *next; | |
}; | |
typedef struct entry_s entry_t; | |
struct hashtable_s { | |
int size; | |
struct entry_s **table; | |
}; | |
typedef struct hashtable_s hashtable_t; | |
/* Create a new hashtable. */ | |
hashtable_t *ht_create( int size ) { | |
hashtable_t *hashtable = NULL; | |
int i; | |
if( size < 1 ) return NULL; | |
/* Allocate the table itself. */ | |
if( ( hashtable = malloc( sizeof( hashtable_t ) ) ) == NULL ) { | |
return NULL; | |
} | |
/* Allocate pointers to the head nodes. */ | |
if( ( hashtable->table = malloc( sizeof( entry_t * ) * size ) ) == NULL ) { | |
return NULL; | |
} | |
for( i = 0; i < size; i++ ) { | |
hashtable->table[i] = NULL; | |
} | |
hashtable->size = size; | |
return hashtable; | |
} | |
/* Hash a string for a particular hash table. */ | |
int ht_hash( hashtable_t *hashtable, char *key ) { | |
unsigned long int hashval; | |
int i = 0; | |
/* Convert our string to an integer */ | |
while( hashval < ULONG_MAX && i < strlen( key ) ) { | |
hashval = hashval << 8; | |
hashval += key[ i ]; | |
i++; | |
} | |
return hashval % hashtable->size; | |
} | |
/* Create a key-value pair. */ | |
entry_t *ht_newpair( char *key, char *value ) { | |
entry_t *newpair; | |
if( ( newpair = malloc( sizeof( entry_t ) ) ) == NULL ) { | |
return NULL; | |
} | |
if( ( newpair->key = strdup( key ) ) == NULL ) { | |
return NULL; | |
} | |
if( ( newpair->value = strdup( value ) ) == NULL ) { | |
return NULL; | |
} | |
newpair->next = NULL; | |
return newpair; | |
} | |
/* Insert a key-value pair into a hash table. */ | |
void ht_set( hashtable_t *hashtable, char *key, char *value ) { | |
int bin = 0; | |
entry_t *newpair = NULL; | |
entry_t *next = NULL; | |
entry_t *last = NULL; | |
bin = ht_hash( hashtable, key ); | |
next = hashtable->table[ bin ]; | |
while( next != NULL && next->key != NULL && strcmp( key, next->key ) > 0 ) { | |
last = next; | |
next = next->next; | |
} | |
/* There's already a pair. Let's replace that string. */ | |
if( next != NULL && next->key != NULL && strcmp( key, next->key ) == 0 ) { | |
free( next->value ); | |
next->value = strdup( value ); | |
/* Nope, could't find it. Time to grow a pair. */ | |
} else { | |
newpair = ht_newpair( key, value ); | |
/* We're at the start of the linked list in this bin. */ | |
if( next == hashtable->table[ bin ] ) { | |
newpair->next = next; | |
hashtable->table[ bin ] = newpair; | |
/* We're at the end of the linked list in this bin. */ | |
} else if ( next == NULL ) { | |
last->next = newpair; | |
/* We're in the middle of the list. */ | |
} else { | |
newpair->next = next; | |
last->next = newpair; | |
} | |
} | |
} | |
/* Retrieve a key-value pair from a hash table. */ | |
char *ht_get( hashtable_t *hashtable, char *key ) { | |
int bin = 0; | |
entry_t *pair; | |
bin = ht_hash( hashtable, key ); | |
/* Step through the bin, looking for our value. */ | |
pair = hashtable->table[ bin ]; | |
while( pair != NULL && pair->key != NULL && strcmp( key, pair->key ) > 0 ) { | |
pair = pair->next; | |
} | |
/* Did we actually find anything? */ | |
if( pair == NULL || pair->key == NULL || strcmp( key, pair->key ) != 0 ) { | |
return NULL; | |
} else { | |
return pair->value; | |
} | |
} | |
int main( int argc, char **argv ) { | |
hashtable_t *hashtable = ht_create( 65536 ); | |
ht_set( hashtable, "key1", "inky" ); | |
ht_set( hashtable, "key2", "pinky" ); | |
ht_set( hashtable, "key3", "blinky" ); | |
ht_set( hashtable, "key4", "floyd" ); | |
printf( "%s\n", ht_get( hashtable, "key1" ) ); | |
printf( "%s\n", ht_get( hashtable, "key2" ) ); | |
printf( "%s\n", ht_get( hashtable, "key3" ) ); | |
printf( "%s\n", ht_get( hashtable, "key4" ) ); | |
return 0; | |
} |
This is a hashing algorithm I implemented for a client in 1992. The client needed
to store hundreds of thousands of graphics files in an MRP system on AIX servers but
long before database BLOBs were released into the wild. (Full disclosure: I would
not have used BLOBs even if they had been available.) The idea was to create a
set of directories numbered 0..SOME NUMBER and find the image files by hashing a
normalized string that represented a filename. This one's signature has been
modified for use in hash.c. The actual implementation's return expression was:
return (hash % PRIME) % QUEUES;
where PRIME = 23017 and QUEUES = 503. Both are prime numbers, PRIME to encourage
a uniform pseudo-random distribution and QUEUES was the number of file system
directories deemed needed to hold the collection and its expected growth at the
time, as in 0..QUEUES-1. I tested the distribution of keys exhaustively for
uniformity before implementing. The client was pleased and when last I consulted
to them in 2004, the algorithm was still in use. Weinberger's algorithm was
published in Programming in C++, Dewhurst & Stark, PH, 1989, ISBM 0-13-723156-3;
Copyright © Peter Weinberger, AT&T Bell Laboratories, Inc., 1989. Obviously, you
may need permission to use it in a commercial distribution but not to study or as
I did, for a client who uses it only internally. It is so well-known that
anti-plagiarism software will detect it, but the point is, don't reinvent the wheel. Stand
on the shoulders of giants. If you're a student, your project might be to use this or some
other hashing algorithm in a novel and a unique to you, application, of course, citing
your sources, unless you're studying numerical programming, and then all bets are off.
/* Hash a string for a particular hash table. */
int ht_hash( size_t ht_size, char *key ) {
unsigned int hash = 0, g;
register char *p; /* pointer used to scan and access string */
/* Convert our string to an integer using Peter Weinberger's algorithm */
for (p = key; *p; p++) {
hash = (hash << 4) + *p;
/* assumes a 32 bit int size */
if ((g = hash & 0xf0000000)) {
hash ^= g >> 24;
hash ^= g;
}
}
return hash % ht_size;
}
Nice. I wish I knew about that hash when I wrote the my version :laughing:
I was more focused on memory management at the time. Being a little further on in my career, I think I'd start with a test suite and build backwards.
hashval is not initialized to 0;
int ht_hash(hashtable_t *hashtable, char *key) {
**unsigned long int hashval = 0;**
int i = 0;
/* Convert our string to an integer */
while(hashval < ULONG_MAX && i < strlen(key)) {
hashval = hashval << 8;
hashval += key[i];
i++;
}
return hashval % hashtable->size;
}
Hi,
This is very informative, thank you for sharing :)
I have a project in C which I need to use some kind of DB to store information which is basically a large table with a lot of fields for each entry. The code should work online and I'm always getting more entries and I don't know the hash table size in advance.
I also need to keep that DB sorted since I need to compare between the fields and make decision accordingly.
I want to use the hash table for having the field that I need to decide for as the key and a struct with all the other fields as values.
I was wondering about:
1.Can I dynamically enlarge the table and not set a predefined size to it? Any Ideas how? Would it make is much more compliacted in calculating the keys-to-hashes?
2.Is there a fast/efficient way to keeo the table sorted by the keys?
3.Can I make the key to be int? How the ht_hash would impact by that?
In case yo have other ideas for me regarding DBs I can use in C, I would love to hear them.
Thank you!
@Liron24 maybe something along these lines https://referencesource.microsoft.com/#mscorlib/system/collections/hashtable.cs,10fefb6e0ae510dd
Memory leak if table allocation fails.
/* Allocate pointers to the head nodes. */
if ((ht->tab = malloc(sizeof( entry_t) * size)) == NULL)
return NULL;
should be
/* Allocate pointers to the head nodes. */
if ((ht->tab = malloc(sizeof( entry_t) * size)) == NULL)
{
free(ht);
return NULL;
}
@tonious what is the purpose of typedefing the entry_s and hashtable_s structs to entry_t & hashtable_t ?
thanks.
Delete entries
void ht_del( hashtable_t *hashtable, char *key){
int bin = 0;
int flag =0;
entry_t *temp,*pair;
bin = ht_hash (hashtable,key);
pair = hashtable->table[bin]; // Go to index node
temp = pair;
while(pair != NULL) {
if(strcmp(key,pair->key) == 0 ){
flag = 1;
if(pair == hashtable->table[bin]){
hashtable->table[bin] = pair->next;
}
else{
temp->next = pair->next;
}
free(pair);
break;
}
temp = pair;
pair = pair->next;
}
if (flag){
printf("Data deleted successfully from Hash Table\n");
}
else{
printf("Given data is not present in hash table!!!!\n");
}
}
@tonious Hey, I'm using this on my program, I don't care if it has bugs as for my purposes it works just fine.
Do I need to do anything else other than mention where I took this code from?
Thanks.
@fgl82, I just took a look at simplemenu
, and I agree. This actually isn't a horrible solution for your purposes. It's still a horrible solution for all other purposes 😁.
I'd recommend using a better hashing algorithm. perlboy-emeritus has a great comment on this. But that's a fine stroke, and won't matter in your use case.
FWIW, I have a gameboy zero build that I really like, but emulation station really is too heavy for the way I use it. I can't wait to see how simplemenu
turns out.
@tonious, I'm just seeing this reply! Man, thanks for this, it's working great after tweaking it just a bit, have had no complaints so far!
@tonious I couldn't find the source of the reference of the relation between (
inky
,pinky
,blinky
) andfloyd
.Where did you get the reference from? (Google gives just instances of your code (thank you for it) which is spread all over the place)
@igor375 I like your hash table implementation, but it doesn't ever free memory =(