public
Created

  • Download Gist
gistfile1.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
/****************************************************************************
**
*F hash_dos.c Stuff 1.00 2012-06-06 ms
**
** This small program demonstrates that a multiplicative hash function
** CANNOT be secured against denial-of-service attacks using a initial seed
** value.
**
** That is because two strings <str1> and <str2> of the same length that
** hash to the same value (i.e. produce a collision) for ONE seed will
** actually hash to the same value for ALL seeds.
**
** This is not necessarily true for two strings <str1> and <str2> of
** different length; they might hash to the same value for one seed and hash
** to different values for the other seeds.
**
** A multiplicative hash function 'h(<seed>,<str>)' has the following form
**
** h( seed, s[0..n-1] ) = (((seed*A+s[0])*A+s[1])*A + ... + s[n-2])*A+s[n-1]
**
** The hash function DJB2 by Dan Bernstein has this form (with the
** multiplicative constant A=33). It is used in the NoSQL database Redis.
**
** This weakness can be seen theoretically if you realize that:
**
** h( seed, s[0..n-1] ) = seed*A^n + s[0]*A^(n-1) + ... s[n-2]*A + s[n-1]
**
** The example uses the fact that 33*'B'+'A' == 33*'A'+'b' (since upper and
** lowercase letters are exactely 32 positions apart in ASCII). So the
** 2^<n> different strings (of length 2*<n>) matching the regular expression
** '(BA|Ab){n}' hash to the same value.
**
** Compile with 'cc -o hash_dos hash_dos.c', execute with './hash_dos'.
*/
 
#include <stdio.h>
 
 
/****************************************************************************
**
** The following code is copied verbatim (including the comment) from the
** Redis source as of 2012-06-06, file './src/dict.c' (see Redis in GitHub
** "https://github.com/antirez/redis"). It is assumed that this copying is
** a "fair use".
*/
 
static int dict_hash_function_seed = 5381;
 
void dictSetHashFunctionSeed(unsigned int seed) {
dict_hash_function_seed = seed;
}
 
unsigned int dictGetHashFunctionSeed(void) {
return dict_hash_function_seed;
}
 
/* Generic hash function (a popular one from Bernstein).
* I tested a few and this was the best. */
unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
unsigned int hash = dict_hash_function_seed;
 
while (len--)
hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
return hash;
}
 
 
/****************************************************************************
**
** main function
**
** locks for a seed for which 'BABA', 'BAAb', 'AbBA', 'AbAb' do not all hash
** to the same value (does not really expect to find one ;-).
*/
 
#define SEED_LOW 2718
#define SEED_HIGH 3141
main ()
{
int i;
for ( i = SEED_LOW; i < SEED_HIGH; i++ ) {
dictSetHashFunctionSeed( i );
if ( dictGenHashFunction("BABA",4)!=dictGenHashFunction("BAAb",4)
|| dictGenHashFunction("BABA",4)!=dictGenHashFunction("AbBA",4)
|| dictGenHashFunction("BABA",4)!=dictGenHashFunction("AbAb",4) ) {
break;
}
}
if ( i < SEED_HIGH ) {
printf("not a collision for seed %d (unexpected)\n",i);
}
else {
printf("collisions for all seeds (as expected)\n");
}
}
 
 
/****************************************************************************
**
*E hash_dos.c . . . . . . . . . . . . . . . . . . . . . . . . . . ends here
**
*A ms: "Schoenert - Martin" <martin.Schoenert@web.de>
**
*H 1.00 2012-06-06 ms initial version
*/

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.