Skip to content

Instantly share code, notes, and snippets.

@croepha
Last active August 13, 2019 05:21
Show Gist options
  • Save croepha/dc27f46b969cc3745b81d4c4d467fbb0 to your computer and use it in GitHub Desktop.
Save croepha/dc27f46b969cc3745b81d4c4d467fbb0 to your computer and use it in GitHub Desktop.
// License and copyright at the bottom
/* These variations of SHA-2 are implmented:
* checksum bytes nocopy alignment in bytes nocopy chunk size in bytes
SHA256 32 4 64
SHA512 64 8 128
SHA512_256 32 8 128
*/
// There are also three APIs for each, simple, buffered, and nocopy:
#if 0
// Simple (high level):
char*data = "string to hash";
unsigned char hash_simple[32];
SHA256_simple(hash_simple, data, strlen(data));
// hash_simple is the output
// Buffered (mid level):
char*data1 = "first data";
char*data2 = "second data";
char*data3 = "third data";
HashBufferedContext buffered_ctx;
SHA256_buffered_initialize(&buffered_ctx);
SHA256_buffered_update(&buffered_ctx, data1, strlen(data1));
SHA256_buffered_update(&buffered_ctx, data2, strlen(data2));
SHA256_buffered_update(&buffered_ctx, data3, strlen(data3));
SHA256_buffered_finalize(&buffered_ctx);
// buffered_ctx.hash is output...
// nocopy (low level):
alignas(4) unsigned char chunk1[64] = "this is chunk one";
alignas(4) unsigned char chunk2[64] = "this is chunk two";
alignas(4) unsigned char chunk3[64] = "this is chunk three";
unsigned char leftover[22] = "this is leftover data"; // Not a full chunk
size_t total_size =
/*count of full chunks:*/ 3 *
/*chunk size:*/ 64 +
/*leftover:*/ sizeof(leftover);
unsigned char hash_nocopy[32];
SHA256_nocopy_initialize(hash_nocopy);
SHA256_nocopy_update(hash_nocopy, chunk1);
SHA256_nocopy_update(hash_nocopy, chunk2);
SHA256_nocopy_update(hash_nocopy, chunk3);
SHA256_nocopy_finalize(hash_nocopy, total_size, leftover, sizeof leftover);
// hash_nocopy is the output
#endif
// TODO: We should look into intel optimizaions:
// paper: "Fast SHA-256 Implementations on Intel Architecture Processors"
// paper: "Fast SHA-512 Implementations on Intel Architecture Processors"
// We should also benchmark ourselved against others, like the kernel implementions
// Probably should port to big-endian...
#ifdef CRO_SHA2_REINCLUDE
#define M1(m_v, m_p) m_p ## _ ## m_v
#define M2(m_v, m_p) M1(m_v, m_p)
#define M0(m_v) M2(m_v, SHA_PROFILE)
void M0(nocopy_initialize)(unsigned char*hash_out) {
typedef M10(WORD_TYPE) W;
auto hash = (W*)(void*)hash_out;
for (int i=0; i<8; i++) { hash[i] = M0(INITIAL_HASH_VALUES[i]) ^ M0(IV_MODIFICATION); }
}
__attribute__((no_sanitize("unsigned-integer-overflow")))
void M0(nocopy_update)(unsigned char*hash_out, unsigned char*chunk_data) {
typedef M10(WORD_TYPE) W;
auto hash = (W*)(void*)hash_out;
W w[M0(ROUND_COUNT)];
for (int i=0; i<16;i++) {
w[i] = endian_swap(*(W*)(void*)(chunk_data+(M0(WORD_BITS)/8)*i));
}
for (int i=16; i<M0(ROUND_COUNT);i++) {
W s0 = rotateright(w[i-15], M0(s0_0)) ^
rotateright(w[i-15], M0(s0_1)) ^ (w[i-15] >> M0(s0_2));
W s1 = rotateright(w[i- 2], M0(s1_0)) ^
rotateright(w[i- 2], M0(s1_1)) ^ (w[i- 2] >> M0(s1_2));
w[i] = w[i-16] + s0 + w[i-7] + s1;
}
W a = hash[0];
W b = hash[1];
W c = hash[2];
W d = hash[3];
W e = hash[4];
W f = hash[5];
W g = hash[6];
W h = hash[7];
for (int i=0; i<M0(ROUND_COUNT);i++) {
W S1 = rotateright(e, M0(S1_0)) ^ rotateright(e, M0(S1_1)) ^ rotateright(e, M0(S1_2));
W ch = (e & f) ^ (~e & g); // TODO: Aparently this can be optimized see wiki
W temp1 = h + S1 + ch + M0(ROUND_CONSTANT_K[i]) + w[i];
W S0 = rotateright(a, M0(S0_0)) ^ rotateright(a, M0(S0_1)) ^ rotateright(a, M0(S0_2));
W maj = (a&b)^(a&c)^(b&c); // TODO: Aparently this can be optimized see wiki
W temp2 = S0 + maj;
h = g;
g = f;
f = e;
e = d + temp1;
d = c;
c = b;
b = a;
a = temp1 + temp2;
}
hash[0] += a;
hash[1] += b;
hash[2] += c;
hash[3] += d;
hash[4] += e;
hash[5] += f;
hash[6] += g;
hash[7] += h;
}
void M0(nocopy_finalize) (unsigned char*hash_out, size_t total_len, void*leftover, size_t leftover_len) {
assert(leftover_len < M0(CHUNK_BITS)/8);
if (leftover_len + 1 + M0(LENGTH_BITS)/8 > M0(CHUNK_BITS)/8) {
unsigned char work_buf[2*M0(CHUNK_BITS)/8];
memcpy(work_buf, leftover, leftover_len);
work_buf[leftover_len] = 1<<7;
memset(work_buf+leftover_len+1, 0, (size_t)(sizeof work_buf - (size_t)leftover_len-1));
*(unsigned long long*)(void*)(work_buf+sizeof work_buf-64/8) = endian_swap((unsigned long long)total_len*8);
M0(nocopy_update) (hash_out, work_buf);
M0(nocopy_update) (hash_out, work_buf+M0(CHUNK_BITS)/8);
} else {
unsigned char work_buf[M0(CHUNK_BITS)/8];
memcpy(work_buf, leftover, (size_t)leftover_len);
work_buf[leftover_len] = 1<<7;
memset(work_buf+leftover_len+1, 0, (size_t)(sizeof work_buf - (size_t)leftover_len-1));
*(unsigned long long*)(void*)(work_buf+sizeof work_buf-64/8) = endian_swap((unsigned long long)total_len*8);
M0(nocopy_update) (hash_out, work_buf);
}
typedef M10(WORD_TYPE) W;
auto hash = (W*)(void*)hash_out;
for (int i=0; i<M0(HASH_WORDS_OUT); i++) { hash[i] = endian_swap(hash[i]); }
}
void M0(buffered_initialize) (HashBufferedContext*scratch) {
*scratch = {};
M0(nocopy_initialize)(scratch->hash);
}
void M0(buffered_update) (HashBufferedContext*scratch, void* data_, unsigned long long data_len) {
auto data = (unsigned char*)data_;
auto data_len_start = data_len;
unsigned long long chunk_bytes = M0(CHUNK_BITS)/8;
unsigned long long scratch_len = scratch->data_len % chunk_bytes;
unsigned long long scratch_left = chunk_bytes - scratch_len;
if (data_len + scratch_len < chunk_bytes) {
memcpy(scratch->scratch+scratch_len, data, data_len);
data+=data_len;
data_len-=data_len;
} else {
memcpy(scratch->scratch+scratch_len, data, scratch_left);
M0(nocopy_update) (scratch->hash, scratch->scratch);
data+=scratch_left;
data_len-=scratch_left;
while(data_len >= chunk_bytes) {
memcpy(scratch->scratch, data, chunk_bytes);
M0(nocopy_update) (scratch->hash, scratch->scratch);
data+=chunk_bytes;
data_len-=chunk_bytes;
}
}
memcpy(scratch->scratch, data, data_len);
scratch->data_len += data_len_start;
}
void M0(buffered_finalize) (HashBufferedContext*scratch) {
unsigned long long scratch_len = scratch->data_len%(M0(CHUNK_BITS)/8);
M0(nocopy_finalize) (scratch->hash, scratch->data_len, scratch->scratch, scratch_len);
}
void M0(simple) (unsigned char*hash_out, void* data, size_t data_len) {
HashBufferedContext ctx;
M0(buffered_initialize)(&ctx);
M0(buffered_update)(&ctx, data, data_len);
M0(buffered_finalize)(&ctx);
memcpy(hash_out, ctx.hash, M0(HASH_WORDS_OUT)*sizeof(M0(WORD_TYPE)));
}
#undef SHA_PROFILE
#else
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
unsigned long long round_up(unsigned long long x, unsigned long long y) {
auto _mod = x % y;
auto ret = x/y;
if (_mod) {
ret++;
}
return ret*y;
}
unsigned int endian_swap(unsigned int x) {
auto xb = (unsigned char*)&x;
unsigned int ret;
auto rb = (unsigned char*)&ret;
rb[0] = xb[3];
rb[1] = xb[2];
rb[2] = xb[1];
rb[3] = xb[0];
return ret;
}
unsigned long long endian_swap(unsigned long long x) {
auto xb = (unsigned char*)&x;
unsigned long long ret;
auto rb = (unsigned char*)&ret;
rb[0] = xb[7];
rb[1] = xb[6];
rb[2] = xb[5];
rb[3] = xb[4];
rb[4] = xb[3];
rb[5] = xb[2];
rb[6] = xb[1];
rb[7] = xb[0];
return ret;
}
inline static unsigned int rotateright(unsigned int x, unsigned int n) {
n=n&31;
return (x>>n) | (x<<(32-n));
}
inline static unsigned long long rotateright(unsigned long long x, unsigned long long n) {
n=n&63;
return (x>>n) | (x<<(64-n));
}
struct HashBufferedContext {
alignas(8) unsigned char hash[64];
alignas(8) unsigned char scratch[128];
unsigned long long data_len;
};
#define M11(m_v, m_p) m_p ## _ ## m_v
#define M12(m_v, m_p) M11(m_v, m_p)
#define M10(m_v) M12(m_v, SHA_PROFILE)
#define PROFILE_PARAMETERS(m_word_type, m_word_bits, m_chunk_bits, m_round_count, m_length_bits, m_S0_0, m_S0_1, m_S0_2, m_S1_0, m_S1_1, m_S1_2, m_s0_0, m_s0_1, m_s0_2, m_s1_0, m_s1_1, m_s1_2, m_IV_MODIFICATION, m_HASH_WORDS_OUT) \
typedef m_word_type M10(WORD_TYPE); \
static const int M10(WORD_BITS) = m_word_bits; \
static const int M10(CHUNK_BITS) = m_chunk_bits; \
static const int M10(ROUND_COUNT) = m_round_count; \
static const int M10(LENGTH_BITS) = m_length_bits; \
static const int M10(S0_0) = m_S0_0; \
static const int M10(S0_1) = m_S0_1; \
static const int M10(S0_2) = m_S0_2; \
static const int M10(S1_0) = m_S1_0; \
static const int M10(S1_1) = m_S1_1; \
static const int M10(S1_2) = m_S1_2; \
static const int M10(s0_0) = m_s0_0; \
static const int M10(s0_1) = m_s0_1; \
static const int M10(s0_2) = m_s0_2; \
static const int M10(s1_0) = m_s1_0; \
static const int M10(s1_1) = m_s1_1; \
static const int M10(s1_2) = m_s1_2; \
static const M10(WORD_TYPE) M10(IV_MODIFICATION) = m_IV_MODIFICATION; \
static const int M10(HASH_WORDS_OUT) = m_HASH_WORDS_OUT;
#define INITIAL_HASH_VALUES(...) \
static const M10(WORD_TYPE) M10(INITIAL_HASH_VALUES)[] = { __VA_ARGS__ };
#define ROUND_CONSTANT_K(...) \
static const M10(WORD_TYPE) M10(ROUND_CONSTANT_K)[] = { __VA_ARGS__ };
void print256(unsigned char*hash_out) {
for (int i=0;i<256/8;i++) {
printf("%02x", hash_out[i]);
}
}
#define CRO_SHA2_REINCLUDE
#define SHA_PROFILE SHA256
PROFILE_PARAMETERS(unsigned int, 32,512,64,64,2,13,22,6,11,25,7,18,3,17,19,10,0,8);
INITIAL_HASH_VALUES(
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19);
ROUND_CONSTANT_K(
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2);
#include "cro_sha2.hpp"
#define SHA512_IV 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, \
0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
#define SHA512_K \
0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, 0x3956c25bf348b538, \
0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, 0xd807aa98a3030242, 0x12835b0145706fbe, \
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, \
0xc19bf174cf692694, 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, \
0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, 0x983e5152ee66dfab, \
0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, 0xc6e00bf33da88fc2, 0xd5a79147930aa725, \
0x06ca6351e003826f, 0x142929670a0e6e70, 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, \
0x53380d139d95b3df, 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, \
0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, 0xd192e819d6ef5218, \
0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, \
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, \
0x682e6ff3d6b2b8a3, 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, \
0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, 0xca273eceea26619c, \
0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, 0x06f067aa72176fba, 0x0a637dc5a2c898a6, \
0x113f9804bef90dae, 0x1b710b35131c471b, 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, \
0x431d67c49c100d4c, 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
#define SHA_PROFILE SHA512
PROFILE_PARAMETERS(unsigned long long,64,1024,80,128,28,34,39,14,18,41,1,8,7,19,61,6,0,8);
INITIAL_HASH_VALUES(SHA512_IV); ROUND_CONSTANT_K(SHA512_K);
#include "cro_sha2.hpp"
#define SHA_PROFILE SHA512_256
PROFILE_PARAMETERS(unsigned long long,64,1024,80,128,28,34,39,14,18,41,1,8,7,19,61,6,0,4);
INITIAL_HASH_VALUES(0x22312194fc2bf72c, 0x9f555fa3c84c64c2, 0x2393b86b6f53b151, 0x963877195940eabd,
0x96283ee2a88effe3, 0xbe5e1e2553863992, 0x2b0199fc2c85b8aa, 0x0eb72ddc81c52ca2);
ROUND_CONSTANT_K(SHA512_K);
#include "cro_sha2.hpp"
#undef M1
#undef M2
#undef M0
#undef M11
#undef M12
#undef M10
#undef PROFILE_PARAMETERS
#undef INITIAL_HASH_VALUES
#undef ROUND_CONSTANT_K
#undef TEST_SHA2_REINCLUDE
#undef SHA_PROFILE
#endif
/* Copyright 2019 David Butler <croepha@gmail.com>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment