Skip to content

Instantly share code, notes, and snippets.

@jandk
Created April 10, 2013 08:27
Show Gist options
  • Save jandk/5352820 to your computer and use it in GitHub Desktop.
Save jandk/5352820 to your computer and use it in GitHub Desktop.
Calculate 4 MD5 hashes at the same time. ~2.5x speedup.
#include <emmintrin.h>
typedef unsigned char uint8;
typedef unsigned int uint32;
typedef unsigned long long uint64;
typedef struct MD5Data
{
__m128i h0, h1, h2, h3;
} MD5Data;
#define _MM_TRANSPOSE4_PI(r0, r1, r2, r3) \
{ \
__m128i t3, t2, t1, t0; \
\
t0 = _mm_unpacklo_epi32((r0), (r1)); \
t1 = _mm_unpacklo_epi32((r2), (r3)); \
t2 = _mm_unpackhi_epi32((r0), (r1)); \
t3 = _mm_unpackhi_epi32((r2), (r3)); \
\
(r0) = _mm_unpacklo_epi64(t0, t1); \
(r1) = _mm_unpackhi_epi64(t0, t1); \
(r2) = _mm_unpacklo_epi64(t2, t3); \
(r3) = _mm_unpackhi_epi64(t2, t3); \
}
#define EXPAND128(x) (_mm_set1_epi32((int)(x)))
#define ALL1 EXPAND128(0xffffffff)
#define ALL0 _mm_setzero_si128()
#define MD5_ROL(x, r) \
_mm_or_si128( \
_mm_slli_epi32(x, r), \
_mm_srli_epi32(x, 32 - r) \
);
#define MD5_RnA(a, b, s) \
a = MD5_ROL(a, s); \
a = _mm_add_epi32(a, b);
#define MD5_ROUND1(a, b, c, d, Xk, s, Ti) \
a = _mm_add_epi32(a, \
_mm_add_epi32( \
_mm_or_si128(_mm_and_si128(c, b), _mm_andnot_si128(b, d)), \
_mm_add_epi32(Xk, EXPAND128(Ti)) \
) \
); \
MD5_RnA(a, b, s);
#define MD5_ROUND2(a, b, c, d, Xk, s, Ti) \
a = _mm_add_epi32(a, \
_mm_add_epi32( \
_mm_or_si128(_mm_and_si128(d, b), _mm_andnot_si128(d, c)), \
_mm_add_epi32(Xk, EXPAND128(Ti)) \
) \
); \
MD5_RnA(a, b, s);
#define MD5_ROUND3(a, b, c, d, Xk, s, Ti) \
a = _mm_add_epi32(a, \
_mm_add_epi32( \
_mm_xor_si128(b, _mm_xor_si128(c, d)), \
_mm_add_epi32(Xk, EXPAND128(Ti)) \
) \
); \
MD5_RnA(a, b, s);
#define MD5_ROUND4(a, b, c, d, Xk, s, Ti) \
a = _mm_add_epi32(a, \
_mm_add_epi32( \
_mm_xor_si128(c, _mm_or_si128(b, _mm_xor_si128(d, ALL1))), \
_mm_add_epi32(Xk, EXPAND128(Ti)) \
) \
); \
MD5_RnA(a, b, s);
#define MD5_AA EXPAND128(0x67452301)
#define MD5_BB EXPAND128(0xefcdab89)
#define MD5_CC EXPAND128(0x98badcfe)
#define MD5_DD EXPAND128(0x10325476)
void md5(MD5Data *data, const uint32 *len)
{
__m128i a, b, c, d;
__m128i aa, bb, cc, dd;
__m128i ml8;
// This is why I love C
uint8* chardata = (uint8 *)data;
chardata[ 0 + len[0]] = 0x80;
chardata[16 + len[1]] = 0x80;
chardata[32 + len[2]] = 0x80;
chardata[48 + len[3]] = 0x80;
ml8 = _mm_set_epi32(len[3], len[2], len[1], len[0]);
ml8 = _mm_slli_epi32(ml8, 3);
aa = data->h0;
bb = data->h1;
cc = data->h2;
dd = data->h3;
// First transpose the MD5
_MM_TRANSPOSE4_PI(aa, bb, cc, dd);
// Do some stuff beforehand
a = _mm_add_epi32(EXPAND128(0xd76aa477), aa);
MD5_RnA(a, MD5_BB, 7);
d = _mm_add_epi32(EXPAND128(0xf8fa0bcc), bb);
d = _mm_add_epi32(d, _mm_or_si128(_mm_and_si128(a, MD5_BB), _mm_andnot_si128(a, MD5_CC)));
MD5_RnA(d, a, 12);
c = _mm_add_epi32(EXPAND128(0xbcdb4dd9), cc);
c = _mm_add_epi32(c, _mm_or_si128(_mm_and_si128(d, a), _mm_andnot_si128(d, MD5_BB)));
MD5_RnA(c, d, 17);
b = _mm_add_epi32(EXPAND128(0xb18b7a77), dd);
b = _mm_add_epi32(b, _mm_or_si128(_mm_and_si128(d, c), _mm_andnot_si128(c, a)));
MD5_RnA(b, c, 22);
// Main body
MD5_ROUND1(a, b, c, d, ALL0, 7, 0xf57c0faf);
MD5_ROUND1(d, a, b, c, ALL0, 12, 0x4787c62a);
MD5_ROUND1(c, d, a, b, ALL0, 17, 0xa8304613);
MD5_ROUND1(b, c, d, a, ALL0, 22, 0xfd469501);
MD5_ROUND1(a, b, c, d, ALL0, 7, 0x698098d8);
MD5_ROUND1(d, a, b, c, ALL0, 12, 0x8b44f7af);
MD5_ROUND1(c, d, a, b, ALL0, 17, 0xffff5bb1);
MD5_ROUND1(b, c, d, a, ALL0, 22, 0x895cd7be);
MD5_ROUND1(a, b, c, d, ALL0, 7, 0x6b901122);
MD5_ROUND1(d, a, b, c, ALL0, 12, 0xfd987193);
MD5_ROUND1(c, d, a, b, ml8, 17, 0xa679438e);
MD5_ROUND1(b, c, d, a, ALL0, 22, 0x49b40821);
MD5_ROUND2(a, b, c, d, bb, 5, 0xf61e2562);
MD5_ROUND2(d, a, b, c, ALL0, 9, 0xc040b340);
MD5_ROUND2(c, d, a, b, ALL0, 14, 0x265e5a51);
MD5_ROUND2(b, c, d, a, aa, 20, 0xe9b6c7aa);
MD5_ROUND2(a, b, c, d, ALL0, 5, 0xd62f105d);
MD5_ROUND2(d, a, b, c, ALL0, 9, 0x02441453);
MD5_ROUND2(c, d, a, b, ALL0, 14, 0xd8a1e681);
MD5_ROUND2(b, c, d, a, ALL0, 20, 0xe7d3fbc8);
MD5_ROUND2(a, b, c, d, ALL0, 5, 0x21e1cde6);
MD5_ROUND2(d, a, b, c, ml8, 9, 0xc33707d6);
MD5_ROUND2(c, d, a, b, dd, 14, 0xf4d50d87);
MD5_ROUND2(b, c, d, a, ALL0, 20, 0x455a14ed);
MD5_ROUND2(a, b, c, d, ALL0, 5, 0xa9e3e905);
MD5_ROUND2(d, a, b, c, cc, 9, 0xfcefa3f8);
MD5_ROUND2(c, d, a, b, ALL0, 14, 0x676f02d9);
MD5_ROUND2(b, c, d, a, ALL0, 20, 0x8d2a4c8a);
MD5_ROUND3(a, b, c, d, ALL0, 4, 0xfffa3942);
MD5_ROUND3(d, a, b, c, ALL0, 11, 0x8771f681);
MD5_ROUND3(c, d, a, b, ALL0, 16, 0x6d9d6122);
MD5_ROUND3(b, c, d, a, ml8, 23, 0xfde5380c);
MD5_ROUND3(a, b, c, d, bb, 4, 0xa4beea44);
MD5_ROUND3(d, a, b, c, ALL0, 11, 0x4bdecfa9);
MD5_ROUND3(c, d, a, b, ALL0, 16, 0xf6bb4b60);
MD5_ROUND3(b, c, d, a, ALL0, 23, 0xbebfbc70);
MD5_ROUND3(a, b, c, d, ALL0, 4, 0x289b7ec6);
MD5_ROUND3(d, a, b, c, aa, 11, 0xeaa127fa);
MD5_ROUND3(c, d, a, b, dd, 16, 0xd4ef3085);
MD5_ROUND3(b, c, d, a, ALL0, 23, 0x04881d05);
MD5_ROUND3(a, b, c, d, ALL0, 4, 0xd9d4d039);
MD5_ROUND3(d, a, b, c, ALL0, 11, 0xe6db99e5);
MD5_ROUND3(c, d, a, b, ALL0, 16, 0x1fa27cf8);
MD5_ROUND3(b, c, d, a, cc, 23, 0xc4ac5665);
MD5_ROUND4(a, b, c, d, aa, 6, 0xf4292244);
MD5_ROUND4(d, a, b, c, ALL0, 10, 0x432aff97);
MD5_ROUND4(c, d, a, b, ml8, 15, 0xab9423a7);
MD5_ROUND4(b, c, d, a, ALL0, 21, 0xfc93a039);
MD5_ROUND4(a, b, c, d, ALL0, 6, 0x655b59c3);
MD5_ROUND4(d, a, b, c, dd, 10, 0x8f0ccc92);
MD5_ROUND4(c, d, a, b, ALL0, 15, 0xffeff47d);
MD5_ROUND4(b, c, d, a, bb, 21, 0x85845dd1);
MD5_ROUND4(a, b, c, d, ALL0, 6, 0x6fa87e4f);
MD5_ROUND4(d, a, b, c, ALL0, 10, 0xfe2ce6e0);
MD5_ROUND4(c, d, a, b, ALL0, 15, 0xa3014314);
MD5_ROUND4(b, c, d, a, ALL0, 21, 0x4e0811a1);
MD5_ROUND4(a, b, c, d, ALL0, 6, 0xf7537e82);
MD5_ROUND4(d, a, b, c, ALL0, 10, 0xbd3af235);
MD5_ROUND4(c, d, a, b, cc, 15, 0x2ad7d2bb);
MD5_ROUND4(b, c, d, a, ALL0, 21, 0xeb86d391);
a = _mm_add_epi32(a, MD5_AA);
b = _mm_add_epi32(b, MD5_BB);
c = _mm_add_epi32(c, MD5_CC);
d = _mm_add_epi32(d, MD5_DD);
_MM_TRANSPOSE4_PI(a, b, c, d);
data->h0 = a;
data->h1 = b;
data->h2 = c;
data->h3 = d;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment