Last active
November 12, 2016 08:46
-
-
Save laverdet/b1d562cb882a9252c10060852e6a5d99 to your computer and use it in GitHub Desktop.
niahash.c w/ __uint128_t removed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <string.h> | |
#define HI(n) ((uint64_t)(n)>>32) | |
#define LO(n) ((uint64_t)(n)&0xffffffff) | |
#define U128(hi,lo) ((my_uint128_t){ .high = hi, .low = lo}) | |
typedef struct { | |
uint64_t high; | |
uint64_t low; | |
} my_uint128_t; | |
#define __uint128_t my_uint128_t | |
my_uint128_t add128(my_uint128_t left, my_uint128_t right) { | |
my_uint128_t sum = U128(left.high + right.high, left.low + right.low); | |
if (sum.low < right.low) { | |
++sum.high; | |
} | |
return sum; | |
} | |
int cmp128(my_uint128_t left, my_uint128_t right) { | |
if (left.high == right.high) { | |
if (left.low == right.low) { | |
return 0; | |
} | |
return left.low < right.low ? -1 : 1; | |
} | |
return left.high < right.high ? -1 : 1; | |
} | |
my_uint128_t and128(my_uint128_t left, my_uint128_t right) { | |
return U128( | |
left.high & right.high, | |
left.low & left.low | |
); | |
} | |
my_uint128_t mul64(uint64_t left, uint64_t right) { | |
uint64_t u1 = LO(left); | |
uint64_t v1 = LO(right); | |
uint64_t t = u1 * v1; | |
uint64_t w3 = LO(t); | |
uint64_t k = HI(t); | |
left = HI(left); | |
t = (left * v1) + k; | |
k = LO(t); | |
uint64_t w1 = HI(t); | |
right = HI(right); | |
t = (u1 * right) + k; | |
k = HI(t); | |
return U128((left * right) + w1 + k, (t << 32) + w3); | |
} | |
#if 1 | |
/* IOS 1.13.3 */ | |
static uint64_t magic_table[16] = { | |
0x95C05F4D1512959E, 0xE4F3C46EEF0DCF07, | |
0x6238DC228F980AD2, 0x53F3E3BC49607092, | |
0x4E7BE7069078D625, 0x1016D709D1AD25FC, | |
0x044E89B8AC76E045, 0xE0B684DDA364BFA1, | |
0x90C533B835E89E5F, 0x3DAF462A74FA874F, | |
0xFEA54965DD3EF5A0, 0x287A5D7CCB31B970, | |
0xAE681046800752F8, 0x121C2D6EAF66EC6E, | |
0xEE8F8CA7E090FB20, 0xCE1AE25F48FE0A52, | |
}; | |
#define ROUND_MAGIC U128(0x78F32468CD48D6DE,0x14C983660183C0AE) | |
#define FINAL_MAGIC0 0xBDB31B10864F3F87 | |
#define FINAL_MAGIC1 0x5B7E9E828A9B8ABD | |
#endif | |
#if 0 | |
/* Android 0.43.4 */ | |
static uint64_t magic_table[16] = { | |
0x48CD0D725609F95F, 0x25D4A39B5ACB4330, | |
0x1C0C27978A3649A3, 0x5C7068B9C51C5E4B, | |
0x69A054CBE1369106, 0x4C318ED6A12B9645, | |
0xC751EECD2715C836, 0xAAEDCC7A92014B7A, | |
0xE91DB51D36F47460, 0x78EA4A974D9157B8, | |
0xE4E65C1A929E8AB1, 0x6BC61BB9C5988769, | |
0x78C7794B899D8819, 0xB338727B9C7600F7, | |
0x26BA60FCB9EDC151, 0xE7D74B3CD6293E6B, | |
}; | |
#define ROUND_MAGIC U128(0x1A32C90D816A2F1F,0x76327D13FD037D57) | |
#define FINAL_MAGIC0 0x106245053E723AD8 | |
#define FINAL_MAGIC1 0x1CDA65FFA125C8F6 | |
#endif | |
#if 0 | |
/* Android 0.41.4 */ | |
static uint64_t magic_table[16] = { | |
0x475BD60F17CE7238, 0xC11B0E0066794E31, | |
0x75F5BD04F566D70C, 0x09F4F46E7CEC785C, | |
0x6A52B40820F5EBFF, 0x27F300DB7195A066, | |
0xBDFDE4DAC75939BF, 0xF7F239CFD77A36AB, | |
0x7013DEFA151CD579, 0x8864183CFD4C24F9, | |
0x21C426C79EA1445A, 0xB188FEAE415747BA, | |
0x127421C8D0BD9352, 0x8C7E6FC0526AD558, | |
0x7E33F449C404A71A, 0xE955B7D15DE757DC, | |
}; | |
#define ROUND_MAGIC U128(0x232B242A99C10878,0x667EFDF872801CD8) | |
#define FINAL_MAGIC0 0xF6AC14F2D12AB0C1 | |
#define FINAL_MAGIC1 0x101FF0340EC93F87 | |
#endif | |
uint64_t compute_hash(const uint8_t *in, uint32_t len); | |
static __uint128_t hash_muladd( | |
__uint128_t hash, __uint128_t mul, __uint128_t add); | |
static __uint128_t hash_chunk(const uint8_t *chunk, int64_t size); | |
static uint64_t read_int64(const uint8_t *p); | |
uint64_t read_int64(const uint8_t *p) | |
{ | |
// endian-safe read 64-bit integer | |
uint64_t n = 0; | |
for (int i = 7; i >= 0; i--) { | |
n = (n << 8) | p[i]; | |
} | |
return n; | |
} | |
uint64_t compute_hash(const uint8_t *in, uint32_t len) | |
{ | |
uint32_t num_chunks = len / 128; | |
// copy tail, pad with zeroes | |
uint8_t tail[128] = {0}; | |
int tail_size = len % 128; | |
memcpy(tail, in + len - tail_size, tail_size); | |
__uint128_t hash; | |
if (num_chunks) { | |
// Hash the first 128 bytes | |
hash = hash_chunk(in, 128); | |
} else { | |
// Hash the tail | |
hash = hash_chunk(tail, tail_size); | |
} | |
hash = add128(hash, ROUND_MAGIC); | |
if (num_chunks) { | |
while (--num_chunks) { | |
in += 128; | |
hash = hash_muladd(hash, ROUND_MAGIC, hash_chunk(in, 128)); | |
} | |
if (tail_size) { | |
hash = hash_muladd(hash, ROUND_MAGIC, hash_chunk(tail, tail_size)); | |
} | |
} | |
// Finalize the hash | |
hash = add128(hash, U128(tail_size * 8, 0)); | |
if (cmp128(hash, U128(0x7fffffffffffffff,0xffffffffffffffff)) >= 0) { | |
hash = add128(hash, U128(0, 1)); | |
} | |
hash = and128(hash, U128(0x7fffffffffffffff,0xffffffffffffffff)); | |
uint64_t hash_high = hash.high; | |
uint64_t hash_low = hash.low; | |
uint64_t X = hash_high + HI(hash_low); | |
X = HI(X + HI(X) + 1) + hash_high; | |
uint64_t Y = (X << 32) + hash_low; | |
uint64_t A = X + FINAL_MAGIC0; | |
if (A < X) { | |
A += 0x101; | |
} | |
uint64_t B = Y + FINAL_MAGIC1; | |
if (B < Y) { | |
B += 0x101; | |
} | |
__uint128_t H = mul64(A, B); | |
H = add128(mul64(0x101, H.high), U128(0, H.low)); | |
H = add128(mul64(0x101, H.high), U128(0, H.low)); | |
if (H.high) { | |
H = add128(H, U128(0, 0x101)); | |
} | |
if (H.low > 0xFFFFFFFFFFFFFEFE) { | |
H = add128(H, U128(0, 0x101)); | |
} | |
return H.low; | |
} | |
__uint128_t hash_chunk(const uint8_t *chunk, int64_t size) | |
{ | |
__uint128_t hash = U128(0, 0); | |
for (int i = 0; i < 8; i++) { | |
int offset = i * 16; | |
if (offset >= size) { | |
break; | |
} | |
uint64_t a = read_int64(chunk + offset); | |
uint64_t b = read_int64(chunk + offset + 8); | |
hash = add128(hash, mul64(a + magic_table[i * 2], b + magic_table[i * 2 + 1])); | |
} | |
return and128(hash, U128(0x3fffffffffffffff, 0xffffffffffffffff)); | |
} | |
__uint128_t hash_muladd(__uint128_t hash, __uint128_t mul, __uint128_t add) | |
{ | |
uint64_t a0 = LO(add.low), a1 = HI(add.low), a23 = add.high; | |
uint64_t m0 = LO(mul.low), m1 = HI(mul.low); | |
uint64_t m2 = LO(mul.high), m3 = HI(mul.high); | |
uint64_t h0 = LO(hash.low), h1 = HI(hash.low); | |
uint64_t h2 = LO(hash.high), h3 = HI(hash.high); | |
/* Column sums, before carry */ | |
uint64_t c0 = (h0 * m0); | |
uint64_t c1 = (h0 * m1) + (h1 * m0); | |
uint64_t c2 = (h0 * m2) + (h1 * m1) + (h2 * m0); | |
uint64_t c3 = (h0 * m3) + (h1 * m2) + (h2 * m1) + (h3 * m0); | |
uint64_t c4 = (h1 * m3) + (h2 * m2) + (h3 * m1); | |
uint64_t c5 = (h2 * m3) + (h3 * m2); | |
uint64_t c6 = (h3 * m3); | |
/* Combine, add, and carry (bugs included) */ | |
uint64_t r2 = c2 + (c6 << 1) + a23; | |
uint64_t r3 = c3 + HI(r2); | |
uint64_t r0 = c0 + (c4 << 1) + a0 + (r3 >> 31); | |
uint64_t r1 = c1 + (c5 << 1) + a1 + HI(r0); | |
/* Return as uint128_t */ | |
return U128(((r3 << 33 >> 1) | LO(r2)) + HI(r1), (r1 << 32) | LO(r0)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment