Skip to content

Instantly share code, notes, and snippets.

@lynnporu
Last active November 11, 2020 13:42
Show Gist options
  • Save lynnporu/11cb33333ab6dcf607bc0f8dc524cc80 to your computer and use it in GitHub Desktop.
Save lynnporu/11cb33333ab6dcf607bc0f8dc524cc80 to your computer and use it in GitHub Desktop.
MD5 implementation in C using Linux file mapping
/*
This is MD5 C implementation using Linux file mapping. You can compile this
as main using following directive:
gcc -Wall md5.c io.c -o bin/md5
Having this .c file is also mandatory when you're including md5.h somewhere
in your program. In that case just delete main() from this file or use
-DOMIT_MD5_MAIN flag when compiling.
Following flags are available:
-DPRINT_READ Prints message "read: .../..." that shows you calculation
process.
-DOMIT_MD5_MAIN Do not include main of md5.c in your code.
*/
#include "md5.h"
// Right-shift the `p` pointer upto `n` bytes=8 bits.
#ifndef RSHIFT_U8
#define RSHIFT_U8(p, n) ( (uint8_t*)(p)+(n) )
#endif
#ifndef OMIT_MD5_MAIN
// These chars are used to store user -i, -o and -c flags.
char compare_mode = 0;
char read_from_file = 0;
// Renaming `print_to_file` and `fout` variables will
// ruin IO_PRINT macro defined in io.h. You should also rename
// these in the macro or just don't use IO_PRINT.
char print_to_file = 0;
FILE* fout;
char compare_hash[32];
mapstream_t fin;
int main(int argc, char** argv) {
int argument = 0;
while( (argument = getopt(argc, argv, "hei:o:c:")) != -1 ){
switch(argument){
case 'h':
printf(
"This program generates MD5 hash from the string you type into stdin. EOF is\n"
"exptected at the end of the stdin. You can generate it with Ctrl+D in Linux\n"
"or Ctrl+Z in Windows terminal. Output will be typed into stdout.\n"
" -h Prints this message;\n"
" -e Do not cut the last symbol of the read buffer (usually EOF or \\0);\n"
" -i [..] Take input from the given file instead of stdin;\n"
" -o [..] Rewrite the given file with the output. Do not print to stdout;\n"
" -c [..] Compare stdin (or file, if -o used) with the hash given here. Prints\n"
" '0' if hash does match and '1' otherwise.\n"
);
return 0;
break;
case 'e':
IO_CUT_EOF = 0;
break;
case 'i':
read_from_file = 1;
openfmap(&fin, optarg, O_RDONLY, 0, PROT_READ);
fin.sizeslot1 = md5_size(fin.fsize);
break;
case 'o':
print_to_file = 1;
// No need to map output file.
if((fout = fopen(optarg, "w+")) == NULL){
EMERGENCY("Cannot open output file");
exit(EXIT_FAILURE);
}
break;
case 'c':
compare_mode = 1;
// Unsafe behaviour here, because optarg can be
// smaller or bigger than 32 bytes.
strcat(compare_hash, optarg);
break;
}
}
if(!read_from_file){
size_t fsize;
char* msg;
readstream(&msg, &fsize, stdin);
fin.map = (void*)msg;
fin.fsize = (off_t)fsize;
}
md_buffer_t md;
md5_calculate(&fin, &md);
const char* md5_result = md5_string(&md);
// Be carefull with IO_PRINT macro, as it relies on
// `fout` and `print_to_file` variables be named exactly
// like that.
if(compare_mode)
IO_PRINT(
"%d\n", strcmp(compare_hash, md5_result) == 0
)
else
IO_PRINT("%s\n", md5_result)
if(read_from_file) closefmap(&fin);
else free(fin.map);
if(print_to_file) fclose(fout);
free((void*)md5_result);
return 0;
}
#endif
uint32_t md5_f1(md_buffer_t* md){
return (md->words.b.n & md->words.c.n) | ((~md->words.b.n) & md->words.d.n);
}
uint32_t md5_f2(md_buffer_t* md){
return (md->words.d.n & md->words.b.n) | ((~md->words.d.n) & md->words.c.n);
}
uint32_t md5_f3(md_buffer_t* md){
return md->words.b.n ^ md->words.c.n ^ md->words.d.n;
}
uint32_t md5_f4(md_buffer_t* md){
return md->words.c.n ^ (md->words.b.n | (~md->words.d.n));
}
/*
* This function's output desired to be feeded to md5_calculate. It reads the
* file from the mapstream_t and outputs the following structure:
* [message =
* [A = file data], len(A) = source->fsize
* [B = padding zeros], len(B) is such that (len(A)+len(B)) % 448 = 0
* [C = fsize number], len(C) = 64
* ]
* When `message` is read up to the end, this function returns MD5_EOF. Further
* callings will return the same. In order to get the message again, you should
* set source->cursor=0 manually. */
md5_chunk* md5_read_map(char* freeafter, mapstream_t* source){
*freeafter = 0;
// Here and further 64 = 512 / 8 - size of chunk in bytes
// 16 is the number of uint32_t that the chunk can fit in.
// Reached the end of the message
if(source->cursor >= source->sizeslot1 + 64){
return MD5_EOF;
}
#ifdef PRINT_READ
printf("read: %.12ld / %.12ld\r", source->cursor, source->sizeslot1);
#endif
// Chunk is entirely in file bounds
if(source->cursor + 64 <= source->fsize)
// You can replace this line with RSHIFT_U8.
return (md5_chunk*)(source->map + (source->cursor += 64));
// Otherwise allocate a chunk and fill it customly.
md5_chunk* chunk = calloc(16, sizeof(uint32_t));
*freeafter = 1;
// Chunk partially includes the file, write the beginning to
// zero-initialized buffer
if(
source->cursor + 64 > source->fsize &&
source->fsize > source->cursor
){
memcpy(
chunk,
source->map + source->cursor,
// fsize-cursor evaluates to bytes left till the of the file,
source->fsize - source->cursor
);
// Write "1" bit
*RSHIFT_U8(chunk, source->fsize - source->cursor) = 0x80;
}
// Ultimately, this condition can be omitted, because no
// previous passed.
// Append filesize in bits
if(
source->sizeslot1 + 64 > source->cursor &&
source->cursor >= source->sizeslot1
){
uint64_t bsize = source->fsize * 8L;
memcpy(RSHIFT_U8(chunk, 56), &bsize, 8);
}
source->cursor += 64;
return chunk;
}
/*
* Returns new size of the file such that fsize*8 = 448 (mod 512). */
uint64_t md5_size(off_t fsize){
// You can replace this with the following formula if you brave
// enough:
// return ((((fsize + 8) / 64) + 1) * 64) - 8
uint64_t newlen = 56;
while(fsize > newlen) newlen += 64;
return newlen;
}
/*
* This function calculates MD5 hash on file mapping given
* as mapstream_t structure. If you're looking for MD5-hashing
* on generic char* string, have a look at md5_hash_raw(). */
void md5_calculate(mapstream_t* fin, md_buffer_t* md){
md->words.a.n = 0x67452301;
md->words.b.n = 0xefcdab89;
md->words.c.n = 0x98badcfe;
md->words.d.n = 0x10325476;
char freeafter;
md5_chunk* chunk;
while((chunk = md5_read_map(&freeafter, fin)) != MD5_EOF){
// Initialize local md buffer and assign the value of the global one.
md_buffer_t md_it = *md;
for(uint8_t i = 0; i < 64; i++) {
// 1 for 0 <= i < 16,
// 2 for 16 <= i < 32,
// 3 for 32 <= i < 48, and 4 for rest
uint8_t round = (i >> 4) & 3;
// f and g are being calculated by function pointers
uint32_t f = (*md5_fns[round])(&md_it);
uint32_t g = (md5_rho_m[round] * i + md5_rho_add[round])
% (i < 16 ? 64 : 16);
md_word temp = md_it.words.d;
md_it.words.d = md_it.words.c;
md_it.words.c = md_it.words.b;
md_it.words.b.n = md_it.words.b.n
+ MD5_LEFTROTATE(
(md_it.words.a.n + f + md5_k[i] + chunk->w[g]),
md5_r[i]
);
md_it.words.a = temp;
}
md->words.a.n += md_it.words.a.n;
md->words.b.n += md_it.words.b.n;
md->words.c.n += md_it.words.c.n;
md->words.d.n += md_it.words.d.n;
if(freeafter) free(chunk);
}
}
const char* md5_string(md_buffer_t* md){
// 32 symbols + '\0'
char* string = malloc(sizeof(char) * 33);
char word[3];
if(string == NULL){
EMERGENCY("Error with allocating result")
exit(EXIT_FAILURE);
}
for(uint8_t i = 0; i < 16; i++){
// If you're looking for sort of optimization,
// sprintf here can be replaced with manual conversion
// to HEX symbols.
sprintf(word, "%2.2x", md->digits[i]);
memcpy(string + (i * 2), word, 3);
}
return string;
}
/*
* This function does MD5 hashing on char* message. */
md_buffer_t* md5_hash_raw(const char* message, size_t length){
// This is common example of how to mask a sequence of bytes
// as if it was a file mapping.
mapstream_t* stream = malloc(sizeof(mapstream_t));
stream->map = (void*)message;
stream->fsize = (off_t)length;
stream->cursor = 0;
stream->sizeslot1 = (off_t)md5_size(stream->fsize);
md_buffer_t* md = malloc(sizeof(md_buffer_t));
md5_calculate(stream, md);
free(stream);
return md;
}
// per-round shift amounts
uint32_t md5_r[] = {
7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21
};
// sin table
uint32_t md5_k[] = {
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
};
uint32_t (*md5_fns[])(md_buffer_t*) = {&md5_f1, &md5_f2, &md5_f3, &md5_f4};
// Suppose round permumations are being performed in the following way:
// rho(i) = (md5_rho_m * i + md5_rho_add) % md5_rho_mod,
// so we can declare the following:
uint8_t md5_rho_m[] = {1, 5, 3, 7};
uint8_t md5_rho_add[] = {0, 1, 5, 0};
#ifndef H_MD5
#define H_MD5
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
// You can find this header file at
// https://gist.github.com/lynnporu/0cb091a71a8e1574dcae3e39a3952b9a
#include "io.h"
#define MD5_LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
extern uint32_t md5_r[];
extern uint32_t md5_k[];
// This union lets you treat 32-bit number as 4 8-bit numbers.
typedef union {
uint32_t n;
uint8_t b[4];
} md_word;
typedef union {
struct {
md_word a;
md_word b;
md_word c;
md_word d;
} words;
uint8_t digits[16];
} md_buffer_t;
typedef struct {
uint32_t w[16];
} md5_chunk;
#define MD5_EOF (md5_chunk*)(-1)
__attribute__((hot, warn_unused_result))
md5_chunk* md5_read_map(char* readafter, mapstream_t* source);
uint64_t md5_size(off_t fsize);
__attribute__((hot, nonnull))
void md5_calculate(mapstream_t* fin, md_buffer_t* md);
const char* md5_string(md_buffer_t* md);
__attribute__((malloc))
md_buffer_t* md5_hash_raw(const char* message, size_t length);
uint32_t md5_f1(md_buffer_t* md);
uint32_t md5_f2(md_buffer_t* md);
uint32_t md5_f3(md_buffer_t* md);
uint32_t md5_f4(md_buffer_t* md);
// This extern was initialized in md5.c file.
extern uint32_t (*md5_fns[])(md_buffer_t*);
extern uint8_t md5_rho_m[];
extern uint8_t md5_rho_add[];
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment