Last active
August 29, 2015 14:01
-
-
Save mniip/220ba4aa1907c7887a34 to your computer and use it in GitHub Desktop.
8-at-a-time MD5 hasher using AVX2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stddef.h> | |
register A asm("xmm0"); | |
register B asm("xmm1"); | |
register C asm("xmm2"); | |
register D asm("xmm3"); | |
const unsigned int A0 = 0x67452301; | |
const unsigned int B0 = 0xEFCDAB89; | |
const unsigned int C0 = 0x98BADCFE; | |
const unsigned int D0 = 0x10325476; | |
void encrypt_short_8(char (*digest)[16], const char **data, const size_t *length) | |
{ | |
unsigned int *block = calloc(64, 8); | |
unsigned int (*block_digest)[4] = digest; | |
unsigned int trans_digest[4][8]; | |
int b, i; | |
for(b = 0; b < 8; b++) | |
{ | |
for(i = 0; i < length[b]; i++) | |
((char *)(block + b + (i >> 2 << 3)))[i & 3] = data[b][i]; | |
((char *)(block + b + (length[b] >> 2 << 3)))[length[b] & 3] = 0x80; | |
block[b + 8 * 14] = length[b] << 3; | |
block[b + 8 * 15] = length[b] >> 29; | |
} | |
unsigned int dummy; | |
asm volatile | |
( | |
"vpbroadcastd A0, %%ymm0\n" | |
"vpbroadcastd B0, %%ymm1\n" | |
"vpbroadcastd C0, %%ymm2\n" | |
"vpbroadcastd D0, %%ymm3\n" | |
"movq %1, %%rdi\n" | |
"callq block_round_8\n" | |
"vmovdqu %%ymm0, 0x0(%0)\n" | |
"vmovdqu %%ymm1, 0x20(%0)\n" | |
"vmovdqu %%ymm2, 0x40(%0)\n" | |
"vmovdqu %%ymm3, 0x60(%0)\n" | |
: : "r"(&trans_digest), "r"(block) | |
); | |
for(b = 0; b < 8; b++) | |
for(i = 0; i < 4; i++) | |
block_digest[b][i] = trans_digest[i][b]; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# extern void block_round_8(const unsigned int (*data)[8]); | |
# __m256i A register("ymm0"); | |
# __m256i B register("ymm1"); | |
# __m256i C register("ymm2"); | |
# __m256i D register("ymm3"); | |
.global block_round_8 | |
.type block_round_8 @function | |
block_round_8: | |
pushq %rbp | |
movq %rsp, %rbp | |
subq $0x4, %rsp; | |
# %rdi - data | |
vmovdqa %ymm0, %ymm4 | |
vmovdqa %ymm1, %ymm5 | |
vmovdqa %ymm2, %ymm6 | |
vmovdqa %ymm3, %ymm7 | |
# M = N + (M + K + F(N, O, P) + block[B]) <<< S | |
.macro iteration F, M, N, O, P, K, B, S | |
\F \N, \O, \P | |
movl $\K, (%rsp) | |
vpbroadcastd (%rsp), %ymm9 | |
vpaddd %ymm9, %ymm8, %ymm8 | |
vpaddd \M, %ymm8, %ymm8 | |
vpaddd \B*4*8(%rdi), %ymm8, %ymm8 | |
vpslld $\S, %ymm8, %ymm9 | |
vpsrld $32-\S, %ymm8, %ymm8 | |
vpor %ymm9, %ymm8, %ymm8 | |
vpaddd \N, %ymm8, \M | |
.endm | |
# (N & O) | (~N & P) | |
.macro func1 N, O, P | |
vpandn \P, \N, %ymm9 | |
vpand \N, \O, %ymm8 | |
vpor %ymm9, %ymm8, %ymm8 | |
.endm | |
iteration func1, %ymm4, %ymm5, %ymm6, %ymm7, 0xD76AA478, 0, 7 | |
iteration func1, %ymm7, %ymm4, %ymm5, %ymm6, 0xE8C7B756, 1, 12 | |
iteration func1, %ymm6, %ymm7, %ymm4, %ymm5, 0x242070DB, 2, 17 | |
iteration func1, %ymm5, %ymm6, %ymm7, %ymm4, 0xC1BDCEEE, 3, 22 | |
iteration func1, %ymm4, %ymm5, %ymm6, %ymm7, 0xF57C0FAF, 4, 7 | |
iteration func1, %ymm7, %ymm4, %ymm5, %ymm6, 0x4787C62A, 5, 12 | |
iteration func1, %ymm6, %ymm7, %ymm4, %ymm5, 0xA8304613, 6, 17 | |
iteration func1, %ymm5, %ymm6, %ymm7, %ymm4, 0xFD469501, 7, 22 | |
iteration func1, %ymm4, %ymm5, %ymm6, %ymm7, 0x698098D8, 8, 7 | |
iteration func1, %ymm7, %ymm4, %ymm5, %ymm6, 0x8B44F7AF, 9, 12 | |
iteration func1, %ymm6, %ymm7, %ymm4, %ymm5, 0xFFFF5BB1, 10, 17 | |
iteration func1, %ymm5, %ymm6, %ymm7, %ymm4, 0x895CD7BE, 11, 22 | |
iteration func1, %ymm4, %ymm5, %ymm6, %ymm7, 0x6B901122, 12, 7 | |
iteration func1, %ymm7, %ymm4, %ymm5, %ymm6, 0xFD987193, 13, 12 | |
iteration func1, %ymm6, %ymm7, %ymm4, %ymm5, 0xA679438E, 14, 17 | |
iteration func1, %ymm5, %ymm6, %ymm7, %ymm4, 0x49B40821, 15, 22 | |
# (P & N) | (~P & O) | |
.macro func2 N, O, P | |
func1 \P, \N, \O | |
.endm | |
iteration func2, %ymm4, %ymm5, %ymm6, %ymm7, 0xF61E2562, 1, 5 | |
iteration func2, %ymm7, %ymm4, %ymm5, %ymm6, 0xC040B340, 6, 9 | |
iteration func2, %ymm6, %ymm7, %ymm4, %ymm5, 0x265E5A51, 11, 14 | |
iteration func2, %ymm5, %ymm6, %ymm7, %ymm4, 0xE9B6C7AA, 0, 20 | |
iteration func2, %ymm4, %ymm5, %ymm6, %ymm7, 0xD62F105D, 5, 5 | |
iteration func2, %ymm7, %ymm4, %ymm5, %ymm6, 0x02441453, 10, 9 | |
iteration func2, %ymm6, %ymm7, %ymm4, %ymm5, 0xD8A1E681, 15, 14 | |
iteration func2, %ymm5, %ymm6, %ymm7, %ymm4, 0xE7D3FBC8, 4, 20 | |
iteration func2, %ymm4, %ymm5, %ymm6, %ymm7, 0x21E1CDE6, 9, 5 | |
iteration func2, %ymm7, %ymm4, %ymm5, %ymm6, 0xC33707D6, 14, 9 | |
iteration func2, %ymm6, %ymm7, %ymm4, %ymm5, 0xF4D50D87, 3, 14 | |
iteration func2, %ymm5, %ymm6, %ymm7, %ymm4, 0x455A14ED, 8, 20 | |
iteration func2, %ymm4, %ymm5, %ymm6, %ymm7, 0xA9E3E905, 13, 5 | |
iteration func2, %ymm7, %ymm4, %ymm5, %ymm6, 0xFCEFA3F8, 2, 9 | |
iteration func2, %ymm6, %ymm7, %ymm4, %ymm5, 0x676F02D9, 7, 14 | |
iteration func2, %ymm5, %ymm6, %ymm7, %ymm4, 0x8D2A4C8A, 12, 20 | |
# (N ^ O ^ P) | |
.macro func3 N, O, P | |
vpxor \N, \O, %ymm8 | |
vpxor \P, %ymm8, %ymm8 | |
.endm | |
iteration func3, %ymm4, %ymm5, %ymm6, %ymm7, 0xFFFA3942, 5, 4 | |
iteration func3, %ymm7, %ymm4, %ymm5, %ymm6, 0x8771F681, 8, 11 | |
iteration func3, %ymm6, %ymm7, %ymm4, %ymm5, 0x6D9D6122, 11, 16 | |
iteration func3, %ymm5, %ymm6, %ymm7, %ymm4, 0xFDE5380C, 14, 23 | |
iteration func3, %ymm4, %ymm5, %ymm6, %ymm7, 0xA4BEEA44, 1, 4 | |
iteration func3, %ymm7, %ymm4, %ymm5, %ymm6, 0x4BDECFA9, 4, 11 | |
iteration func3, %ymm6, %ymm7, %ymm4, %ymm5, 0xF6BB4B60, 7, 16 | |
iteration func3, %ymm5, %ymm6, %ymm7, %ymm4, 0xBEBFBC70, 10, 23 | |
iteration func3, %ymm4, %ymm5, %ymm6, %ymm7, 0x289B7EC6, 13, 4 | |
iteration func3, %ymm7, %ymm4, %ymm5, %ymm6, 0xEAA127FA, 0, 11 | |
iteration func3, %ymm6, %ymm7, %ymm4, %ymm5, 0xD4EF3085, 3, 16 | |
iteration func3, %ymm5, %ymm6, %ymm7, %ymm4, 0x04881D05, 6, 23 | |
iteration func3, %ymm4, %ymm5, %ymm6, %ymm7, 0xD9D4D039, 9, 4 | |
iteration func3, %ymm7, %ymm4, %ymm5, %ymm6, 0xE6DB99E5, 12, 11 | |
iteration func3, %ymm6, %ymm7, %ymm4, %ymm5, 0x1FA27CF8, 15, 16 | |
iteration func3, %ymm5, %ymm6, %ymm7, %ymm4, 0xC4AC5665, 2, 23 | |
# (O ^ (N | ~P)) | |
.macro func4 N, O, P | |
vpcmpeqd %ymm8, %ymm8, %ymm8 | |
vpxor \P, %ymm8, %ymm8 | |
vpor \N, %ymm8, %ymm8 | |
vpxor \O, %ymm8, %ymm8 | |
.endm | |
iteration func4, %ymm4, %ymm5, %ymm6, %ymm7, 0xF4292244, 0, 6 | |
iteration func4, %ymm7, %ymm4, %ymm5, %ymm6, 0x432AFF97, 7, 10 | |
iteration func4, %ymm6, %ymm7, %ymm4, %ymm5, 0xAB9423A7, 14, 15 | |
iteration func4, %ymm5, %ymm6, %ymm7, %ymm4, 0xFC93A039, 5, 21 | |
iteration func4, %ymm4, %ymm5, %ymm6, %ymm7, 0x655B59C3, 12, 6 | |
iteration func4, %ymm7, %ymm4, %ymm5, %ymm6, 0x8F0CCC92, 3, 10 | |
iteration func4, %ymm6, %ymm7, %ymm4, %ymm5, 0xFFEFF47D, 10, 15 | |
iteration func4, %ymm5, %ymm6, %ymm7, %ymm4, 0x85845DD1, 1, 21 | |
iteration func4, %ymm4, %ymm5, %ymm6, %ymm7, 0x6FA87E4F, 8, 6 | |
iteration func4, %ymm7, %ymm4, %ymm5, %ymm6, 0xFE2CE6E0, 15, 10 | |
iteration func4, %ymm6, %ymm7, %ymm4, %ymm5, 0xA3014314, 6, 15 | |
iteration func4, %ymm5, %ymm6, %ymm7, %ymm4, 0x4E0811A1, 13, 21 | |
iteration func4, %ymm4, %ymm5, %ymm6, %ymm7, 0xF7537E82, 4, 6 | |
iteration func4, %ymm7, %ymm4, %ymm5, %ymm6, 0xBD3AF235, 11, 10 | |
iteration func4, %ymm6, %ymm7, %ymm4, %ymm5, 0x2AD7D2BB, 2, 15 | |
iteration func4, %ymm5, %ymm6, %ymm7, %ymm4, 0xEB86D391, 9, 21 | |
vpaddd %ymm4, %ymm0, %ymm0 | |
vpaddd %ymm5, %ymm1, %ymm1 | |
vpaddd %ymm6, %ymm2, %ymm2 | |
vpaddd %ymm7, %ymm3, %ymm3 | |
leaveq | |
retq |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment