Created
April 30, 2023 21:19
-
-
Save Lucus16/e00d9adaea51b129b144406973844bcf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <string.h> | |
#define MASK26 0x03ffffff | |
// Load 128-bit value from 4 32-bit limbs into 5 26-bit limbs. | |
void load26(uint32_t dst[5], const uint32_t src[4]) { | |
dst[0] = src[0] & MASK26; | |
dst[1] = (src[0] >> 26 | src[1] << 6) & MASK26; | |
dst[2] = (src[1] >> 20 | src[2] << 12) & MASK26; | |
dst[3] = (src[2] >> 14 | src[3] << 18) & MASK26; | |
dst[4] = src[3] >> 8; | |
} | |
// Load 128-bit value from 5 26-bit limbs into 4 32-bit limbs. | |
// Drops the top 2 bits of the 130-bit input. | |
// Assumes the number has already been fully reduced modulo 2^130-5. | |
void load32(uint32_t dst[4], const uint32_t src[5]) { | |
dst[0] = src[0] | src[1] << 26; | |
dst[1] = src[1] >> 6 | src[2] << 20; | |
dst[2] = src[2] >> 12 | src[3] << 14; | |
dst[3] = src[3] >> 18 | src[4] << 8; | |
} | |
void add26(uint32_t h[5], const uint32_t c[5]) { | |
for (int j = 0; j < 5; j++) { | |
h[j] += c[j]; | |
} | |
} | |
// Take a number stored as 26-bit limbs and partially reduce it modulo 2^130-5. | |
void squeeze26(uint32_t h[5]) { | |
uint32_t carry = 0; | |
for (int j = 0; j < 5; j++) { | |
carry += h[j]; | |
h[j] = carry & MASK26; | |
carry >>= 26; | |
} | |
h[0] += 5 * carry; | |
} | |
// Take a number stored as 5 26-bit limbs and fully reduce it modulo 2^130-5. | |
// Input must have bits 26-31 of each limb already cleared. | |
// This fixes inputs in the range [2^130-5, 2^130). | |
void freeze26(uint32_t h[5]) { | |
uint32_t h_plus_5[5], carry; | |
int j; | |
carry = 5; | |
for (j = 0; j < 5; j++) { | |
carry += h[j]; | |
h_plus_5[j] = carry & MASK26; | |
carry >>= 26; | |
} | |
const uint32_t use_h_plus_5 = -carry; | |
for (j = 0; j < 5; j++) { | |
h[j] ^= use_h_plus_5 & (h_plus_5[j] ^ h[j]); | |
} | |
} | |
// umull is u32 * u32 -> u64 multiplication. | |
uint64_t umull(uint32_t x, uint32_t y) { | |
return (uint64_t)x * (uint64_t)y; | |
} | |
void mulmod26(uint32_t h[5], const uint32_t r[5]) { | |
uint32_t result[5]; | |
uint64_t carry; | |
carry = umull(h[0], r[0]) \ | |
+ umull(h[1], r[4]) * 5 \ | |
+ umull(h[2], r[3]) * 5 \ | |
+ umull(h[3], r[2]) * 5 \ | |
+ umull(h[4], r[1]) * 5; | |
result[0] = (uint32_t)(carry & MASK26); | |
carry >>= 26; | |
carry += umull(h[0], r[1]) \ | |
+ umull(h[1], r[0]) \ | |
+ umull(h[2], r[4]) * 5 \ | |
+ umull(h[3], r[3]) * 5 \ | |
+ umull(h[4], r[2]) * 5; | |
result[1] = (uint32_t)(carry & MASK26); | |
carry >>= 26; | |
carry += umull(h[0], r[2]) \ | |
+ umull(h[1], r[1]) \ | |
+ umull(h[2], r[0]) \ | |
+ umull(h[3], r[4]) * 5 \ | |
+ umull(h[4], r[3]) * 5; | |
result[2] = (uint32_t)(carry & MASK26); | |
carry >>= 26; | |
carry += umull(h[0], r[3]) \ | |
+ umull(h[1], r[2]) \ | |
+ umull(h[2], r[1]) \ | |
+ umull(h[3], r[0]) \ | |
+ umull(h[4], r[4]) * 5; | |
result[3] = (uint32_t)(carry & MASK26); | |
carry >>= 26; | |
carry += umull(h[0], r[4]) \ | |
+ umull(h[1], r[3]) \ | |
+ umull(h[2], r[2]) \ | |
+ umull(h[3], r[1]) \ | |
+ umull(h[4], r[0]); | |
result[4] = (uint32_t)(carry & MASK26); | |
carry >>= 26; | |
carry *= 5; | |
carry += (uint64_t)result[0]; | |
h[0] = (uint32_t)(carry & MASK26); | |
carry >>= 26; | |
h[1] = result[1] + (uint32_t)carry; | |
h[2] = result[2]; | |
h[3] = result[3]; | |
h[4] = result[4]; | |
} | |
//int poly1305(unsigned char out[16], const unsigned char in[inlen], | |
// unsigned long long inlen, const unsigned char k[32]) { | |
int poly1305(unsigned char *out, const unsigned char *in, | |
unsigned long long inlen, const unsigned char *k) { | |
uint32_t key32[4], r[5], h[5], c[5]; | |
key32[0] = ((const uint32_t*)k)[0] & 0x0fffffff; | |
key32[1] = ((const uint32_t*)k)[1] & 0x0ffffffc; | |
key32[2] = ((const uint32_t*)k)[2] & 0x0ffffffc; | |
key32[3] = ((const uint32_t*)k)[3] & 0x0ffffffc; | |
load26(r, key32); | |
h[0] = 0; | |
h[1] = 0; | |
h[2] = 0; | |
h[3] = 0; | |
h[4] = 0; | |
while (inlen >= 16) { | |
load26(c, (uint32_t*)in); | |
c[4] |= 0x01000000; | |
add26(h, c); | |
mulmod26(h, r); | |
inlen -= 16; | |
in += 16; | |
} | |
if (inlen != 0) { | |
unsigned char block[16] = {0}; | |
memcpy(block, in, inlen); | |
block[inlen] = 1; | |
load26(c, (uint32_t*)block); | |
add26(h, c); | |
mulmod26(h, r); | |
} | |
load26(c, (const uint32_t*)(k + 16)); | |
add26(h, c); | |
squeeze26(h); | |
squeeze26(h); | |
freeze26(h); | |
load32((uint32_t*)out, h); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment