Last active
November 11, 2020 13:42
-
-
Save lynnporu/11cb33333ab6dcf607bc0f8dc524cc80 to your computer and use it in GitHub Desktop.
MD5 implementation in C using Linux file mapping
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This is MD5 C implementation using Linux file mapping. You can compile this | |
as main using following directive: | |
gcc -Wall md5.c io.c -o bin/md5 | |
Having this .c file is also mandatory when you're including md5.h somewhere | |
in your program. In that case just delete main() from this file or use | |
-DOMIT_MD5_MAIN flag when compiling. | |
Following flags are available: | |
-DPRINT_READ Prints message "read: .../..." that shows you calculation | |
process. | |
-DOMIT_MD5_MAIN Do not include main of md5.c in your code. | |
*/ | |
#include "md5.h" | |
// Right-shift the `p` pointer upto `n` bytes=8 bits. | |
#ifndef RSHIFT_U8 | |
#define RSHIFT_U8(p, n) ( (uint8_t*)(p)+(n) ) | |
#endif | |
#ifndef OMIT_MD5_MAIN | |
// These chars are used to store user -i, -o and -c flags. | |
char compare_mode = 0; | |
char read_from_file = 0; | |
// Renaming `print_to_file` and `fout` variables will | |
// ruin IO_PRINT macro defined in io.h. You should also rename | |
// these in the macro or just don't use IO_PRINT. | |
char print_to_file = 0; | |
FILE* fout; | |
char compare_hash[32]; | |
mapstream_t fin; | |
int main(int argc, char** argv) { | |
int argument = 0; | |
while( (argument = getopt(argc, argv, "hei:o:c:")) != -1 ){ | |
switch(argument){ | |
case 'h': | |
printf( | |
"This program generates MD5 hash from the string you type into stdin. EOF is\n" | |
"exptected at the end of the stdin. You can generate it with Ctrl+D in Linux\n" | |
"or Ctrl+Z in Windows terminal. Output will be typed into stdout.\n" | |
" -h Prints this message;\n" | |
" -e Do not cut the last symbol of the read buffer (usually EOF or \\0);\n" | |
" -i [..] Take input from the given file instead of stdin;\n" | |
" -o [..] Rewrite the given file with the output. Do not print to stdout;\n" | |
" -c [..] Compare stdin (or file, if -o used) with the hash given here. Prints\n" | |
" '0' if hash does match and '1' otherwise.\n" | |
); | |
return 0; | |
break; | |
case 'e': | |
IO_CUT_EOF = 0; | |
break; | |
case 'i': | |
read_from_file = 1; | |
openfmap(&fin, optarg, O_RDONLY, 0, PROT_READ); | |
fin.sizeslot1 = md5_size(fin.fsize); | |
break; | |
case 'o': | |
print_to_file = 1; | |
// No need to map output file. | |
if((fout = fopen(optarg, "w+")) == NULL){ | |
EMERGENCY("Cannot open output file"); | |
exit(EXIT_FAILURE); | |
} | |
break; | |
case 'c': | |
compare_mode = 1; | |
// Unsafe behaviour here, because optarg can be | |
// smaller or bigger than 32 bytes. | |
strcat(compare_hash, optarg); | |
break; | |
} | |
} | |
if(!read_from_file){ | |
size_t fsize; | |
char* msg; | |
readstream(&msg, &fsize, stdin); | |
fin.map = (void*)msg; | |
fin.fsize = (off_t)fsize; | |
} | |
md_buffer_t md; | |
md5_calculate(&fin, &md); | |
const char* md5_result = md5_string(&md); | |
// Be carefull with IO_PRINT macro, as it relies on | |
// `fout` and `print_to_file` variables be named exactly | |
// like that. | |
if(compare_mode) | |
IO_PRINT( | |
"%d\n", strcmp(compare_hash, md5_result) == 0 | |
) | |
else | |
IO_PRINT("%s\n", md5_result) | |
if(read_from_file) closefmap(&fin); | |
else free(fin.map); | |
if(print_to_file) fclose(fout); | |
free((void*)md5_result); | |
return 0; | |
} | |
#endif | |
uint32_t md5_f1(md_buffer_t* md){ | |
return (md->words.b.n & md->words.c.n) | ((~md->words.b.n) & md->words.d.n); | |
} | |
uint32_t md5_f2(md_buffer_t* md){ | |
return (md->words.d.n & md->words.b.n) | ((~md->words.d.n) & md->words.c.n); | |
} | |
uint32_t md5_f3(md_buffer_t* md){ | |
return md->words.b.n ^ md->words.c.n ^ md->words.d.n; | |
} | |
uint32_t md5_f4(md_buffer_t* md){ | |
return md->words.c.n ^ (md->words.b.n | (~md->words.d.n)); | |
} | |
/* | |
* This function's output desired to be feeded to md5_calculate. It reads the | |
* file from the mapstream_t and outputs the following structure: | |
* [message = | |
* [A = file data], len(A) = source->fsize | |
* [B = padding zeros], len(B) is such that (len(A)+len(B)) % 448 = 0 | |
* [C = fsize number], len(C) = 64 | |
* ] | |
* When `message` is read up to the end, this function returns MD5_EOF. Further | |
* callings will return the same. In order to get the message again, you should | |
* set source->cursor=0 manually. */ | |
md5_chunk* md5_read_map(char* freeafter, mapstream_t* source){ | |
*freeafter = 0; | |
// Here and further 64 = 512 / 8 - size of chunk in bytes | |
// 16 is the number of uint32_t that the chunk can fit in. | |
// Reached the end of the message | |
if(source->cursor >= source->sizeslot1 + 64){ | |
return MD5_EOF; | |
} | |
#ifdef PRINT_READ | |
printf("read: %.12ld / %.12ld\r", source->cursor, source->sizeslot1); | |
#endif | |
// Chunk is entirely in file bounds | |
if(source->cursor + 64 <= source->fsize) | |
// You can replace this line with RSHIFT_U8. | |
return (md5_chunk*)(source->map + (source->cursor += 64)); | |
// Otherwise allocate a chunk and fill it customly. | |
md5_chunk* chunk = calloc(16, sizeof(uint32_t)); | |
*freeafter = 1; | |
// Chunk partially includes the file, write the beginning to | |
// zero-initialized buffer | |
if( | |
source->cursor + 64 > source->fsize && | |
source->fsize > source->cursor | |
){ | |
memcpy( | |
chunk, | |
source->map + source->cursor, | |
// fsize-cursor evaluates to bytes left till the of the file, | |
source->fsize - source->cursor | |
); | |
// Write "1" bit | |
*RSHIFT_U8(chunk, source->fsize - source->cursor) = 0x80; | |
} | |
// Ultimately, this condition can be omitted, because no | |
// previous passed. | |
// Append filesize in bits | |
if( | |
source->sizeslot1 + 64 > source->cursor && | |
source->cursor >= source->sizeslot1 | |
){ | |
uint64_t bsize = source->fsize * 8L; | |
memcpy(RSHIFT_U8(chunk, 56), &bsize, 8); | |
} | |
source->cursor += 64; | |
return chunk; | |
} | |
/* | |
* Returns new size of the file such that fsize*8 = 448 (mod 512). */ | |
uint64_t md5_size(off_t fsize){ | |
// You can replace this with the following formula if you brave | |
// enough: | |
// return ((((fsize + 8) / 64) + 1) * 64) - 8 | |
uint64_t newlen = 56; | |
while(fsize > newlen) newlen += 64; | |
return newlen; | |
} | |
/* | |
* This function calculates MD5 hash on file mapping given | |
* as mapstream_t structure. If you're looking for MD5-hashing | |
* on generic char* string, have a look at md5_hash_raw(). */ | |
void md5_calculate(mapstream_t* fin, md_buffer_t* md){ | |
md->words.a.n = 0x67452301; | |
md->words.b.n = 0xefcdab89; | |
md->words.c.n = 0x98badcfe; | |
md->words.d.n = 0x10325476; | |
char freeafter; | |
md5_chunk* chunk; | |
while((chunk = md5_read_map(&freeafter, fin)) != MD5_EOF){ | |
// Initialize local md buffer and assign the value of the global one. | |
md_buffer_t md_it = *md; | |
for(uint8_t i = 0; i < 64; i++) { | |
// 1 for 0 <= i < 16, | |
// 2 for 16 <= i < 32, | |
// 3 for 32 <= i < 48, and 4 for rest | |
uint8_t round = (i >> 4) & 3; | |
// f and g are being calculated by function pointers | |
uint32_t f = (*md5_fns[round])(&md_it); | |
uint32_t g = (md5_rho_m[round] * i + md5_rho_add[round]) | |
% (i < 16 ? 64 : 16); | |
md_word temp = md_it.words.d; | |
md_it.words.d = md_it.words.c; | |
md_it.words.c = md_it.words.b; | |
md_it.words.b.n = md_it.words.b.n | |
+ MD5_LEFTROTATE( | |
(md_it.words.a.n + f + md5_k[i] + chunk->w[g]), | |
md5_r[i] | |
); | |
md_it.words.a = temp; | |
} | |
md->words.a.n += md_it.words.a.n; | |
md->words.b.n += md_it.words.b.n; | |
md->words.c.n += md_it.words.c.n; | |
md->words.d.n += md_it.words.d.n; | |
if(freeafter) free(chunk); | |
} | |
} | |
const char* md5_string(md_buffer_t* md){ | |
// 32 symbols + '\0' | |
char* string = malloc(sizeof(char) * 33); | |
char word[3]; | |
if(string == NULL){ | |
EMERGENCY("Error with allocating result") | |
exit(EXIT_FAILURE); | |
} | |
for(uint8_t i = 0; i < 16; i++){ | |
// If you're looking for sort of optimization, | |
// sprintf here can be replaced with manual conversion | |
// to HEX symbols. | |
sprintf(word, "%2.2x", md->digits[i]); | |
memcpy(string + (i * 2), word, 3); | |
} | |
return string; | |
} | |
/* | |
* This function does MD5 hashing on char* message. */ | |
md_buffer_t* md5_hash_raw(const char* message, size_t length){ | |
// This is common example of how to mask a sequence of bytes | |
// as if it was a file mapping. | |
mapstream_t* stream = malloc(sizeof(mapstream_t)); | |
stream->map = (void*)message; | |
stream->fsize = (off_t)length; | |
stream->cursor = 0; | |
stream->sizeslot1 = (off_t)md5_size(stream->fsize); | |
md_buffer_t* md = malloc(sizeof(md_buffer_t)); | |
md5_calculate(stream, md); | |
free(stream); | |
return md; | |
} | |
// per-round shift amounts | |
uint32_t md5_r[] = { | |
7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, | |
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, | |
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, | |
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21 | |
}; | |
// sin table | |
uint32_t md5_k[] = { | |
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, | |
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, | |
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, | |
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, | |
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, | |
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, | |
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, | |
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, | |
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, | |
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, | |
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, | |
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, | |
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, | |
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, | |
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, | |
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 | |
}; | |
uint32_t (*md5_fns[])(md_buffer_t*) = {&md5_f1, &md5_f2, &md5_f3, &md5_f4}; | |
// Suppose round permumations are being performed in the following way: | |
// rho(i) = (md5_rho_m * i + md5_rho_add) % md5_rho_mod, | |
// so we can declare the following: | |
uint8_t md5_rho_m[] = {1, 5, 3, 7}; | |
uint8_t md5_rho_add[] = {0, 1, 5, 0}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef H_MD5 | |
#define H_MD5 | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <unistd.h> | |
#include <sys/mman.h> | |
#include <sys/stat.h> | |
#include <fcntl.h> | |
// You can find this header file at | |
// https://gist.github.com/lynnporu/0cb091a71a8e1574dcae3e39a3952b9a | |
#include "io.h" | |
#define MD5_LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c)))) | |
extern uint32_t md5_r[]; | |
extern uint32_t md5_k[]; | |
// This union lets you treat 32-bit number as 4 8-bit numbers. | |
typedef union { | |
uint32_t n; | |
uint8_t b[4]; | |
} md_word; | |
typedef union { | |
struct { | |
md_word a; | |
md_word b; | |
md_word c; | |
md_word d; | |
} words; | |
uint8_t digits[16]; | |
} md_buffer_t; | |
typedef struct { | |
uint32_t w[16]; | |
} md5_chunk; | |
#define MD5_EOF (md5_chunk*)(-1) | |
__attribute__((hot, warn_unused_result)) | |
md5_chunk* md5_read_map(char* readafter, mapstream_t* source); | |
uint64_t md5_size(off_t fsize); | |
__attribute__((hot, nonnull)) | |
void md5_calculate(mapstream_t* fin, md_buffer_t* md); | |
const char* md5_string(md_buffer_t* md); | |
__attribute__((malloc)) | |
md_buffer_t* md5_hash_raw(const char* message, size_t length); | |
uint32_t md5_f1(md_buffer_t* md); | |
uint32_t md5_f2(md_buffer_t* md); | |
uint32_t md5_f3(md_buffer_t* md); | |
uint32_t md5_f4(md_buffer_t* md); | |
// This extern was initialized in md5.c file. | |
extern uint32_t (*md5_fns[])(md_buffer_t*); | |
extern uint8_t md5_rho_m[]; | |
extern uint8_t md5_rho_add[]; | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment