Skip to content

Instantly share code, notes, and snippets.

@Sasquire
Last active February 8, 2022 05:09
Show Gist options
  • Save Sasquire/7b8e52471c333febe6c4896cf516afe2 to your computer and use it in GitHub Desktop.
Save Sasquire/7b8e52471c333febe6c4896cf516afe2 to your computer and use it in GitHub Desktop.

MD5 Sum

I wanted a public domain md5-sum utility so I could include it in other projects and have them be entirely public domain. To do this I extended code by Luigi Galli and updated much of it to be what I consider more readable.

Some documentation on the MD5 algorithm can be found here.

How to use

const md5_sum = require('./md5.js');

function string_to_array_buffer (string) {
	// Note that this will probably not work for strange unicode characters
	// this is intended to work ONLY ON SIMPLE ASCII STRINGS
	const array_buffer = new ArrayBuffer(string.length);
	const view = new Uint8Array(array_buffer);
	view.set(string.split('').map(e => e.charCodeAt(0)));
	return array_buffer;
}

const output = md5_sum(string_to_array_buffer('hello'));
console.log(output === '5d41402abc4b2a76b9719d911017c592'); // true

As a quick note, you CAN NOT use this program to calculate the md5 sum of ONLY 13 BITS, it will be rounded to 16 bits (2 bytes). This program assumes all data is passed as full bytes.

Developing

This code was made using CommonJS to work with browserify. All the code can be found in md5.js while tests are in test.js. There should not be much work needed for this repository, but try to keep it simple and contained in one file. If you need to adapt the code for ES6, it should not be hard. Please make a pull request if you do!

Tests can be run, after installing developer packages, by the command npm run test.

License

All code in this repository is licensed under the Unlicense and is in the Public Domain.

/*
Javascript MD5 library - version 0.4
Coded (2011) by Luigi Galli - LG@4e71.org - http://faultylabs.com
https://gist.github.com/ForbesLindesay/5562935
Thanks to: Roberto Viola
The below code is PUBLIC DOMAIN - NO WARRANTY!
The code has been modified from its original state to
better be used in this project. And with that comes from
one public domain license to another.
Unlicense (2019) <https://unlicense.org/>
*/
// convert number to (unsigned) 32 bit hex, zero filled string
function to_zero_filled_hex (n) {
const t1 = (n >>> 0).toString(16);
return '00000000'.substr(0, 8 - t1.length) + t1;
}
// convert a 64 bit unsigned number to array of bytes. Little endian
function int64_to_bytes (num) {
const return_value = [];
for (let i = 0; i < 8; i++) {
return_value.push(num & 0xFF);
num = num >>> 8;
}
return return_value;
}
// 32 bit left-rotation
function rol (num, places) {
return ((num << places) & 0xFFFFFFFF) | (num >>> (32 - places));
}
// The 4 MD5 functions
function fF (b, c, d) {
return (b & c) | (~b & d);
}
function fG (b, c, d) {
return (d & b) | (~d & c);
}
function fH (b, c, d) {
return b ^ c ^ d;
}
function fI (b, c, d) {
return c ^ (b | ~d);
}
// pick 4 bytes at specified offset. Little-endian is assumed
function bytes_to_int32 (arr, off) {
const first = arr[off + 3] << 24;
const second = arr[off + 2] << 16;
const third = arr[off + 1] << 8;
const fourth = arr[off];
return first | second | third | fourth;
}
// convert the 4 32-bit buffers to a 128 bit hex string. (Little-endian is assumed)
function int128le_to_hex (a, b, c, d) {
let ra = '';
let t = 0;
let ta = 0;
for (var i = 3; i >= 0; i--) {
ta = arguments[i]; // a, b, c, d
t = (ta & 0xFF);
ta = ta >>> 8;
t = t << 8;
t = t | (ta & 0xFF);
ta = ta >>> 8;
t = t << 8;
t = t | (ta & 0xFF);
ta = ta >>> 8;
t = t << 8;
t = t | ta;
ra = ra + to_zero_filled_hex(t);
}
return ra;
}
function _add (n1, n2) {
return 0x0FFFFFFFF & (n1 + n2);
}
function do_64_runs (options) {
let a = options.a;
let b = options.b;
let c = options.c;
let d = options.d;
const pointer = options.pointer;
const data = options.data;
function set_values (nf, sin32, dw32, b32) {
const temp = d;
d = c;
c = b;
// b = b + rol(a + (nf + (sin32 + dw32)), b32)
// b = b + rol(inside, b32)
const inside = _add(a, _add(nf, _add(sin32, dw32)));
b = _add(b, rol(inside, b32));
a = temp;
}
function do_run (f_func, binary_sine, offset, shift_amount) {
set_values(
f_func(b, c, d),
binary_sine,
bytes_to_int32(data, pointer + offset),
shift_amount
);
}
const $ = do_run;
$(fF, 0xd76aa478, 0, 7);
$(fF, 0xe8c7b756, 4, 12);
$(fF, 0x242070db, 8, 17);
$(fF, 0xc1bdceee, 12, 22);
$(fF, 0xf57c0faf, 16, 7);
$(fF, 0x4787c62a, 20, 12);
$(fF, 0xa8304613, 24, 17);
$(fF, 0xfd469501, 28, 22);
$(fF, 0x698098d8, 32, 7);
$(fF, 0x8b44f7af, 36, 12);
$(fF, 0xffff5bb1, 40, 17);
$(fF, 0x895cd7be, 44, 22);
$(fF, 0x6b901122, 48, 7);
$(fF, 0xfd987193, 52, 12);
$(fF, 0xa679438e, 56, 17);
$(fF, 0x49b40821, 60, 22);
$(fG, 0xf61e2562, 4, 5);
$(fG, 0xc040b340, 24, 9);
$(fG, 0x265e5a51, 44, 14);
$(fG, 0xe9b6c7aa, 0, 20);
$(fG, 0xd62f105d, 20, 5);
$(fG, 0x2441453, 40, 9);
$(fG, 0xd8a1e681, 60, 14);
$(fG, 0xe7d3fbc8, 16, 20);
$(fG, 0x21e1cde6, 36, 5);
$(fG, 0xc33707d6, 56, 9);
$(fG, 0xf4d50d87, 12, 14);
$(fG, 0x455a14ed, 32, 20);
$(fG, 0xa9e3e905, 52, 5);
$(fG, 0xfcefa3f8, 8, 9);
$(fG, 0x676f02d9, 28, 14);
$(fG, 0x8d2a4c8a, 48, 20);
$(fH, 0xfffa3942, 20, 4);
$(fH, 0x8771f681, 32, 11);
$(fH, 0x6d9d6122, 44, 16);
$(fH, 0xfde5380c, 56, 23);
$(fH, 0xa4beea44, 4, 4);
$(fH, 0x4bdecfa9, 16, 11);
$(fH, 0xf6bb4b60, 28, 16);
$(fH, 0xbebfbc70, 40, 23);
$(fH, 0x289b7ec6, 52, 4);
$(fH, 0xeaa127fa, 0, 11);
$(fH, 0xd4ef3085, 12, 16);
$(fH, 0x4881d05, 24, 23);
$(fH, 0xd9d4d039, 36, 4);
$(fH, 0xe6db99e5, 48, 11);
$(fH, 0x1fa27cf8, 60, 16);
$(fH, 0xc4ac5665, 8, 23);
$(fI, 0xf4292244, 0, 6);
$(fI, 0x432aff97, 28, 10);
$(fI, 0xab9423a7, 56, 15);
$(fI, 0xfc93a039, 20, 21);
$(fI, 0x655b59c3, 48, 6);
$(fI, 0x8f0ccc92, 12, 10);
$(fI, 0xffeff47d, 40, 15);
$(fI, 0x85845dd1, 4, 21);
$(fI, 0x6fa87e4f, 32, 6);
$(fI, 0xfe2ce6e0, 60, 10);
$(fI, 0xa3014314, 24, 15);
$(fI, 0x4e0811a1, 52, 21);
$(fI, 0xf7537e82, 16, 6);
$(fI, 0xbd3af235, 44, 10);
$(fI, 0x2ad7d2bb, 8, 15);
$(fI, 0xeb86d391, 36, 21);
return [
_add(options.a, a),
_add(options.b, b),
_add(options.c, c),
_add(options.d, d)
];
}
function digest (data) {
// initialize 4x32 bit state
let h0 = 0x67452301;
let h1 = 0xEFCDAB89;
let h2 = 0x98BADCFE;
let h3 = 0x10325476;
// Digest message
for (let i = 0; i < data.length / 64; i++) {
[h0, h1, h2, h3] = do_64_runs({
a: h0,
b: h1,
c: h2,
d: h3,
pointer: i * 64,
data: data
});
}
// Done! Convert buffers to 128 bit (LE)
return int128le_to_hex(h3, h2, h1, h0);
}
// data should be of type <ArrayBuffer>
function pad_input (data) {
const original_len = data.byteLength;
// tail is the amount of space used in the last 512 bit block.
// 1 is added to it because 0b10000000 must be appended to the
// end of the buffer before it is passed on.
const tail = (original_len + 1) % 64;
const zero_pad = tail > 56 ? (120 - tail) : (56 - tail);
// 0b10000000 + to 512 bit block + original length
const total_pad = 1 + zero_pad + 8;
const new_size = original_len + total_pad;
// Copy data into new ArrayBuffer of proper size and requirements
const new_data = new ArrayBuffer(new_size);
const view = new Uint8Array(new_data);
view.set(data); // Copy old data
view.set([0x80], original_len); // 0b10000000
// Zero padding is done automatically
view.set(int64_to_bytes(original_len * 8), new_size - 8); // Original size
return view;
}
// TODO if hasing something over 2^64 bits (843.01) cant guarantee anything.
// TODO can not guarantee if not hasging full bytes. hasing 15 bits (2ish byes) untests
// Takes input as an <ArrayBuffer>
function MD5 (data) {
if ((data instanceof ArrayBuffer) === false) {
throw new Error('Data passed must be an array buffer');
}
data = new Uint8Array(data);
data = pad_input(data);
return digest(data);
}
MD5._private = {};
MD5._private.pad_input = pad_input;
module.exports = MD5;
const assert = require('assert');
const crypto = require('crypto');
const random_generator = require('random-seed');
const md5_sum = require('./md5.js');
// Most of the documentation referenced can be found here
// https://datatracker.ietf.org/doc/html/rfc1321
describe('Append Padding Bits & Length', () => {
function proper_padding_size_in_bytes (input_in_bytes) {
let input = input_in_bytes * 8;
// Step 3.1
input += 1;
while ((input % 512) !== 448) {
input += 1;
}
// Step 3.2
input += 64;
return input / 8;
}
it('pads buffers between [0, 4096) bytes to the right size', () => {
for (let i = 0; i < 4096; i++) {
const proper_size = proper_padding_size_in_bytes(i);
const actual_size = md5_sum._private.pad_input(new ArrayBuffer(i)).length;
assert.equal(proper_size, actual_size);
}
});
// Ignore cases where file is greater than 2^64 bits (~843 TB)
it('pads buffers between [0, 4096) bytes with the right values', () => {
for (let i = 0; i < 4096; i++) {
const padded_input = md5_sum._private.pad_input(new ArrayBuffer(i));
// Check step 3.1
assert.equal(padded_input[i], 0x80); // 0b10000000
for (let j = i + 1; j < padded_input.length - 8; j++) {
assert.equal(padded_input[j], 0x00);
}
// Check step 3.2, the last 8 bytes (64 bits)
// Uses little endian so the ones earlier in memory have less importance
let num_bits = 0;
num_bits += padded_input[padded_input.length - 8] * Math.pow(256, 0);
num_bits += padded_input[padded_input.length - 7] * Math.pow(256, 1);
num_bits += padded_input[padded_input.length - 6] * Math.pow(256, 2);
num_bits += padded_input[padded_input.length - 5] * Math.pow(256, 3);
num_bits += padded_input[padded_input.length - 4] * Math.pow(256, 4);
num_bits += padded_input[padded_input.length - 3] * Math.pow(256, 5);
num_bits += padded_input[padded_input.length - 2] * Math.pow(256, 6);
num_bits += padded_input[padded_input.length - 1] * Math.pow(256, 7);
assert.equal(num_bits, i * 8);
}
});
});
describe('md5sum', () => {
before(() => {
// Happens before all tests
});
after(() => {
// Happens after all tests
});
describe('Fails on non-ArrayBuffer objects', () => {
it('fails on an empty string', () => {
assert.throws(() => md5_sum(''));
});
it('fails on a filled string', () => {
assert.throws(() => md5_sum('hello'));
});
it('fails on an object', () => {
assert.throws(() => md5_sum({data1: 1, data2: 'a', data3: {}}));
});
it('fails on a number', () => {
assert.throws(() => md5_sum(1));
});
});
describe('Static UInt8Array tests', () => {
function string_to_array_buffer (string) {
// Note that this will probably not work for strange unicode characters
// this is intended to work ONLY ON SIMPLE ASCII STRINGS
const array_buffer = new ArrayBuffer(string.length);
const view = new Uint8Array(array_buffer);
view.set(string.split('').map(e => e.charCodeAt(0)));
return array_buffer;
}
it('works on empty buffer', () => {
assert.equal(md5_sum(string_to_array_buffer('')), 'd41d8cd98f00b204e9800998ecf8427e');
});
it('works on a filled buffer', () => {
assert.equal(md5_sum(string_to_array_buffer('hello')), '5d41402abc4b2a76b9719d911017c592');
});
});
describe('100,000 seeded random tests of size [0, 4096) match the crypto library\'s answers', () => {
const rand = random_generator('Every day I miss 6268');
function make_random_data (size) {
const array_buffer = new ArrayBuffer(size);
const view = new Uint8Array(array_buffer);
for (let i = 0; i < size; i++) {
view.set(rand(256), i);
}
return array_buffer;
}
function run_10000_tests () {
for (let i = 0; i < 10000; i++) {
const data = make_random_data(rand(4096));
const crypto_sum = crypto.createHash('md5').update(Buffer.from(data)).digest("hex");
const this_sum = md5_sum(data);
assert.equal(crypto_sum, this_sum);
}
}
it('00,000-10,000', run_10000_tests);
it('10,000-20,000', run_10000_tests);
it('20,000-30,000', run_10000_tests);
it('30,000-40,000', run_10000_tests);
it('40,000-50,000', run_10000_tests);
it('50,000-60,000', run_10000_tests);
it('60,000-70,000', run_10000_tests);
it('70,000-80,000', run_10000_tests);
it('80,000-90,000', run_10000_tests);
it('90,000-100,000', run_10000_tests);
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment