-
-
Save creationix/4710780 to your computer and use it in GitHub Desktop.
/* | |
* Simple MD5 implementation | |
* | |
* Compile with: gcc -o md5 -O3 -lm md5.c | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdint.h> | |
// leftrotate function definition | |
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c)))) | |
// These vars will contain the hash | |
uint32_t h0, h1, h2, h3; | |
void md5(uint8_t *initial_msg, size_t initial_len) { | |
// Message (to prepare) | |
uint8_t *msg = NULL; | |
// Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating | |
// r specifies the per-round shift amounts | |
uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, | |
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, | |
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, | |
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21}; | |
// Use binary integer part of the sines of integers (in radians) as constants// Initialize variables: | |
uint32_t k[] = { | |
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, | |
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, | |
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, | |
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, | |
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, | |
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, | |
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, | |
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, | |
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, | |
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, | |
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, | |
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, | |
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, | |
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, | |
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, | |
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391}; | |
h0 = 0x67452301; | |
h1 = 0xefcdab89; | |
h2 = 0x98badcfe; | |
h3 = 0x10325476; | |
// Pre-processing: adding a single 1 bit | |
//append "1" bit to message | |
/* Notice: the input bytes are considered as bits strings, | |
where the first bit is the most significant bit of the byte.[37] */ | |
// Pre-processing: padding with zeros | |
//append "0" bit until message length in bit ≡ 448 (mod 512) | |
//append length mod (2 pow 64) to message | |
int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8; | |
msg = calloc(new_len + 64, 1); // also appends "0" bits | |
// (we alloc also 64 extra bytes...) | |
memcpy(msg, initial_msg, initial_len); | |
msg[initial_len] = 128; // write the "1" bit | |
uint32_t bits_len = 8*initial_len; // note, we append the len | |
memcpy(msg + new_len, &bits_len, 4); // in bits at the end of the buffer | |
// Process the message in successive 512-bit chunks: | |
//for each 512-bit chunk of message: | |
int offset; | |
for(offset=0; offset<new_len; offset += (512/8)) { | |
// break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15 | |
uint32_t *w = (uint32_t *) (msg + offset); | |
#ifdef DEBUG | |
printf("offset: %d %x\n", offset, offset); | |
int j; | |
for(j =0; j < 64; j++) printf("%x ", ((uint8_t *) w)[j]); | |
puts(""); | |
#endif | |
// Initialize hash value for this chunk: | |
uint32_t a = h0; | |
uint32_t b = h1; | |
uint32_t c = h2; | |
uint32_t d = h3; | |
// Main loop: | |
uint32_t i; | |
for(i = 0; i<64; i++) { | |
#ifdef ROUNDS | |
uint8_t *p; | |
printf("%i: ", i); | |
p=(uint8_t *)&a; | |
printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], a); | |
p=(uint8_t *)&b; | |
printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], b); | |
p=(uint8_t *)&c; | |
printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], c); | |
p=(uint8_t *)&d; | |
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], d); | |
puts(""); | |
#endif | |
uint32_t f, g; | |
if (i < 16) { | |
f = (b & c) | ((~b) & d); | |
g = i; | |
} else if (i < 32) { | |
f = (d & b) | ((~d) & c); | |
g = (5*i + 1) % 16; | |
} else if (i < 48) { | |
f = b ^ c ^ d; | |
g = (3*i + 5) % 16; | |
} else { | |
f = c ^ (b | (~d)); | |
g = (7*i) % 16; | |
} | |
#ifdef ROUNDS | |
printf("f=%x g=%d w[g]=%x\n", f, g, w[g]); | |
#endif | |
uint32_t temp = d; | |
d = c; | |
c = b; | |
printf("rotateLeft(%x + %x + %x + %x, %d)\n", a, f, k[i], w[g], r[i]); | |
b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i]); | |
a = temp; | |
} | |
// Add this chunk's hash to result so far: | |
h0 += a; | |
h1 += b; | |
h2 += c; | |
h3 += d; | |
} | |
// cleanup | |
free(msg); | |
} | |
int main(int argc, char **argv) { | |
if (argc < 2) { | |
printf("usage: %s 'string'\n", argv[0]); | |
return 1; | |
} | |
char *msg = argv[1]; | |
size_t len = strlen(msg); | |
// benchmark | |
// int i; | |
// for (i = 0; i < 1000000; i++) { | |
md5(msg, len); | |
// } | |
//var char digest[16] := h0 append h1 append h2 append h3 //(Output is in little-endian) | |
uint8_t *p; | |
// display result | |
p=(uint8_t *)&h0; | |
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h0); | |
p=(uint8_t *)&h1; | |
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h1); | |
p=(uint8_t *)&h2; | |
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h2); | |
p=(uint8_t *)&h3; | |
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h3); | |
puts(""); | |
return 0; | |
} |
Sorry for this question but how can I run this program in big endian machine?
It tested on little endian and it worked but as I mentioned above: I need to try it in big endian.
Do i need to refactor this:
// break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
uint32_t *w = (uint32_t *) (msg + offset);
Because as I know: Big endian read byte with different order. Is this right?
Thank you!
@danielvu1994 Signedness and endiness both don't matter if you're treating values as opaque black boxes generally speaking. But if you want to take the 4 bytes out of a 32 bit integer, for example, then the order you pull them out matters.
Thanks for checking my comment.
I did find a way to pull them follow order of big endian
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h0);
these printf functions have only 4 place holders but 5 arguments are supplied, can someone explain that???
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h0);
these printf functions have only 4 place holders but 5 arguments are supplied, can someone explain that???
Optimized too complex 'if' block:
switch ((i >> 4) & 3)
{
case 0: // (i < 16)
f = (b & c) | ((~b) & d);
g = i;
break;
case 1: // (i < 32)
f = (d & b) | ((~d) & c);
g = (5 * i + 1) % 16;
break;
case 2: // (i < 48)
f = b ^ c ^ d;
g = (3 * i + 5) % 16;
break;
case 3: // other
f = c ^ (b | (~d));
g = (7 * i) % 16;
break;
}
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h0);
these printf functions have only 4 place holders but 5 arguments are supplied, can someone explain that???
I think, we can omit h0
here, because h0
is our 32-bit word, which we are converting to p
and printing by index.
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h0);
these printf functions have only 4 place holders but 5 arguments are supplied, can someone explain that???I think, we can omit
h0
here, becauseh0
is our 32-bit word, which we are converting top
and printing by index.
Thanks!!
I tested in VS2019 and it works with slight modification. I had a problem with calloc() and since I needed this for a specific application, I changed *msg declaration to something fixed:
uint8_t msg[128];
memset(msg, 0, 128);
and it works.
Thanks!
This code is licensed MIT and free to use.
This code is licensed MIT and free to use.
hi, dear developer! i execute your code. but when i try to get md5 from "test". it's return to me
h0=0x9182c883
h1 0xdd0c3fe9
h2 0x96324d31
h3 0xb20dade1
but it's not the typical md5 hash for "test". typical hash is 098f6bcd4621d373cade4e832627b4f6 .
can you prompt me anything?
"test" hashes correctly for me showing 098f6bcd4621d373cade4e832627b4f6
It works very nice!
For "The quick brown fox jumps over the lazy dog", it gives checksum 9e107d9d372bb6826bd81d3542a419d6, which is correct.
But I noticed you bench-marked the code (in main), so I guess you wanted it to be fast.
However, the next code fragment looks like a clumsy and time consuming way to calculate new_len, since it uses a loop to iterate over (potential) many bits, just to count them:
To me, it doesn't seem a good practice to use loops just to calculate something very simple like this.
So I quickly translated above code into the code below, which does exactly the same calculation, but about 320 times faster (on an STM32H7):