Skip to content

Instantly share code, notes, and snippets.

@creationix
Last active March 20, 2024 22:07
Show Gist options
  • Star 75 You must be signed in to star a gist
  • Fork 24 You must be signed in to fork a gist
  • Save creationix/4710780 to your computer and use it in GitHub Desktop.
Save creationix/4710780 to your computer and use it in GitHub Desktop.
/*
* Simple MD5 implementation
*
* Compile with: gcc -o md5 -O3 -lm md5.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
// leftrotate function definition
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
// These vars will contain the hash
uint32_t h0, h1, h2, h3;
void md5(uint8_t *initial_msg, size_t initial_len) {
// Message (to prepare)
uint8_t *msg = NULL;
// Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating
// r specifies the per-round shift amounts
uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
// Use binary integer part of the sines of integers (in radians) as constants// Initialize variables:
uint32_t k[] = {
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};
h0 = 0x67452301;
h1 = 0xefcdab89;
h2 = 0x98badcfe;
h3 = 0x10325476;
// Pre-processing: adding a single 1 bit
//append "1" bit to message
/* Notice: the input bytes are considered as bits strings,
where the first bit is the most significant bit of the byte.[37] */
// Pre-processing: padding with zeros
//append "0" bit until message length in bit ≡ 448 (mod 512)
//append length mod (2 pow 64) to message
int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8;
msg = calloc(new_len + 64, 1); // also appends "0" bits
// (we alloc also 64 extra bytes...)
memcpy(msg, initial_msg, initial_len);
msg[initial_len] = 128; // write the "1" bit
uint32_t bits_len = 8*initial_len; // note, we append the len
memcpy(msg + new_len, &bits_len, 4); // in bits at the end of the buffer
// Process the message in successive 512-bit chunks:
//for each 512-bit chunk of message:
int offset;
for(offset=0; offset<new_len; offset += (512/8)) {
// break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
uint32_t *w = (uint32_t *) (msg + offset);
#ifdef DEBUG
printf("offset: %d %x\n", offset, offset);
int j;
for(j =0; j < 64; j++) printf("%x ", ((uint8_t *) w)[j]);
puts("");
#endif
// Initialize hash value for this chunk:
uint32_t a = h0;
uint32_t b = h1;
uint32_t c = h2;
uint32_t d = h3;
// Main loop:
uint32_t i;
for(i = 0; i<64; i++) {
#ifdef ROUNDS
uint8_t *p;
printf("%i: ", i);
p=(uint8_t *)&a;
printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], a);
p=(uint8_t *)&b;
printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], b);
p=(uint8_t *)&c;
printf("%2.2x%2.2x%2.2x%2.2x ", p[0], p[1], p[2], p[3], c);
p=(uint8_t *)&d;
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], d);
puts("");
#endif
uint32_t f, g;
if (i < 16) {
f = (b & c) | ((~b) & d);
g = i;
} else if (i < 32) {
f = (d & b) | ((~d) & c);
g = (5*i + 1) % 16;
} else if (i < 48) {
f = b ^ c ^ d;
g = (3*i + 5) % 16;
} else {
f = c ^ (b | (~d));
g = (7*i) % 16;
}
#ifdef ROUNDS
printf("f=%x g=%d w[g]=%x\n", f, g, w[g]);
#endif
uint32_t temp = d;
d = c;
c = b;
printf("rotateLeft(%x + %x + %x + %x, %d)\n", a, f, k[i], w[g], r[i]);
b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i]);
a = temp;
}
// Add this chunk's hash to result so far:
h0 += a;
h1 += b;
h2 += c;
h3 += d;
}
// cleanup
free(msg);
}
int main(int argc, char **argv) {
if (argc < 2) {
printf("usage: %s 'string'\n", argv[0]);
return 1;
}
char *msg = argv[1];
size_t len = strlen(msg);
// benchmark
// int i;
// for (i = 0; i < 1000000; i++) {
md5(msg, len);
// }
//var char digest[16] := h0 append h1 append h2 append h3 //(Output is in little-endian)
uint8_t *p;
// display result
p=(uint8_t *)&h0;
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h0);
p=(uint8_t *)&h1;
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h1);
p=(uint8_t *)&h2;
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h2);
p=(uint8_t *)&h3;
printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h3);
puts("");
return 0;
}
@foothsu
Copy link

foothsu commented Jun 4, 2019

uint32_t bits_len = 8*initial_len; // note, we append the len
memcpy(msg + new_len, &bits_len, 4); // in bits at the end of the buffer````

These two lines are different from MD5.
The document says

A 64-bit representation of b (the length of the message before the
padding bits were added) is appended to the result of the previous
step. In the unlikely event that b is greater than 2^64, then only
the low-order 64 bits of b are used.

@bkht
Copy link

bkht commented Aug 23, 2019

It works very nice!
For "The quick brown fox jumps over the lazy dog", it gives checksum 9e107d9d372bb6826bd81d3542a419d6, which is correct.
But I noticed you bench-marked the code (in main), so I guess you wanted it to be fast.

However, the next code fragment looks like a clumsy and time consuming way to calculate new_len, since it uses a loop to iterate over (potential) many bits, just to count them:

int new_len;
for(new_len = initial_len*8 + 1; new_len%512!=448; new_len++);
new_len /= 8;

To me, it doesn't seem a good practice to use loops just to calculate something very simple like this.
So I quickly translated above code into the code below, which does exactly the same calculation, but about 320 times faster (on an STM32H7):

int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8;

@danielvu1994
Copy link

danielvu1994 commented Dec 31, 2019

Sorry for this question but how can I run this program in big endian machine?
It tested on little endian and it worked but as I mentioned above: I need to try it in big endian.

Do i need to refactor this:

       // break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
        uint32_t *w = (uint32_t *) (msg + offset);

Because as I know: Big endian read byte with different order. Is this right?
Thank you!

@creationix
Copy link
Author

creationix commented Jan 10, 2020

@danielvu1994 Signedness and endiness both don't matter if you're treating values as opaque black boxes generally speaking. But if you want to take the 4 bytes out of a 32 bit integer, for example, then the order you pull them out matters.

@danielvu1994
Copy link

Thanks for checking my comment.
I did find a way to pull them follow order of big endian

@alias-rahil
Copy link

printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h0);

these printf functions have only 4 place holders but 5 arguments are supplied, can someone explain that???

@alias-rahil
Copy link

printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h0);

these printf functions have only 4 place holders but 5 arguments are supplied, can someone explain that???

@kisssko
Copy link

kisssko commented Oct 7, 2020

Optimized too complex 'if' block:

            switch ((i >> 4) & 3)
            {
                case 0:    // (i < 16)
                    f = (b & c) | ((~b) & d);
                    g = i;
                    break;
                case 1:    // (i < 32)
                    f = (d & b) | ((~d) & c);
                    g = (5 * i + 1) % 16;
                    break;
                case 2:    // (i < 48)
                    f = b ^ c ^ d;
                    g = (3 * i + 5) % 16;
                    break;
                case 3:    // other
                    f = c ^ (b | (~d));
                    g = (7 * i) % 16;
                    break;
            }

@lynnporu
Copy link

lynnporu commented Oct 9, 2020

printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h0);

these printf functions have only 4 place holders but 5 arguments are supplied, can someone explain that???

I think, we can omit h0 here, because h0 is our 32-bit word, which we are converting to p and printing by index.

@alias-rahil
Copy link

printf("%2.2x%2.2x%2.2x%2.2x", p[0], p[1], p[2], p[3], h0);
these printf functions have only 4 place holders but 5 arguments are supplied, can someone explain that???

I think, we can omit h0 here, because h0 is our 32-bit word, which we are converting to p and printing by index.

Thanks!!

@elektronika-ba
Copy link

I tested in VS2019 and it works with slight modification. I had a problem with calloc() and since I needed this for a specific application, I changed *msg declaration to something fixed:

uint8_t msg[128];
memset(msg, 0, 128);

and it works.

Thanks!

@creationix
Copy link
Author

This code is licensed MIT and free to use.

@emake1187
Copy link

emake1187 commented Dec 3, 2020

This code is licensed MIT and free to use.

hi, dear developer! i execute your code. but when i try to get md5 from "test". it's return to me
h0=0x9182c883
h1 0xdd0c3fe9
h2 0x96324d31
h3 0xb20dade1

but it's not the typical md5 hash for "test". typical hash is 098f6bcd4621d373cade4e832627b4f6 .
can you prompt me anything?

@PerryRylance
Copy link

"test" hashes correctly for me showing 098f6bcd4621d373cade4e832627b4f6

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment