Created
January 6, 2021 00:14
-
-
Save thomcc/b1ba9b77010bf42982fc7eddab78f283 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![cfg_attr(all(feature = "std", not(feature = "getrandom")), no_std)] | |
mod get_random; | |
#[repr(C, align(16))] | |
pub struct ChaCha8 { | |
state: [u32; 16], | |
seed: [u32; 8], | |
ctr: u64, | |
stream_id: u64, | |
} | |
impl ChaCha8 { | |
/// Initialize this rng with a random seed. | |
/// | |
/// If `feature = "getrandom"` is enabled, this uses `getrandom` to generate | |
/// the seed, and panics if `getrandom` returns an error. | |
/// | |
/// If `feature = "std"` is enabled (and `feature = "getrandom"` is not | |
/// enabled), this abuses `std::collections::hash_map::RandomState` to | |
/// generate a random seed. | |
/// | |
/// If the `"getrandom"` and `"std"` features are *both* disabled, then we | |
/// attempt to cobble together some randomness using ASLR / hardware | |
/// randomness (rdseed/rdrand) if available. | |
/// | |
/// For clarity, the `getrandom` feature should be enabled if you're | |
/// considering actually using this in a context where security is important | |
/// (prior to this being audited in some manner, this may not be the most | |
/// advisable thing). | |
#[inline] | |
pub fn new_rand() -> Self { | |
Self::new_buf(crate::get_random::get_rand_state()) | |
} | |
/// Initialize this rng with a seed read from the given bytes. | |
#[inline] | |
pub const fn from_seed_bytes(seed: [u8; 32]) -> Self { | |
Self::new_buf([ | |
u32::from_le_bytes([seed[0], seed[1], seed[2], seed[3]]), | |
u32::from_le_bytes([seed[4], seed[5], seed[6], seed[7]]), | |
u32::from_le_bytes([seed[8], seed[9], seed[10], seed[11]]), | |
u32::from_le_bytes([seed[12], seed[13], seed[14], seed[15]]), | |
u32::from_le_bytes([seed[16], seed[17], seed[18], seed[19]]), | |
u32::from_le_bytes([seed[20], seed[21], seed[22], seed[23]]), | |
u32::from_le_bytes([seed[24], seed[25], seed[26], seed[27]]), | |
u32::from_le_bytes([seed[28], seed[29], seed[30], seed[31]]), | |
]) | |
} | |
/// Initialize this rng with the provided seed data, provided as an array of | |
/// 8 u32s. | |
#[inline] | |
pub const fn from_seed_buffer(seed: [u32; 8]) -> Self { | |
Self::new_full(seed, 0, 0) | |
} | |
#[inline] | |
const fn new_full(seed: [u32; 8], stream_id: u64, ctr: u64) -> Self { | |
Self { | |
state: [0; 16], | |
seed, | |
stream_id, | |
ctr: ctr << 4, | |
} | |
} | |
/// Generate a random `u32`, and return it. | |
#[inline] | |
pub fn next(&mut self) -> u32 { | |
let idx = (self.ctr % 16) as usize; | |
if idx == 0 { | |
self.block(); | |
} | |
self.ctr += 1; | |
self.state[idx] | |
} | |
/// Advance the RNG by `n` steps. | |
#[inline] | |
pub fn skip(&mut self, n: u64) { | |
let idx = self.ctr % 16; | |
self.ctr = self.ctr.wrapping_add(n); | |
if (idx + n) >= 16 && (self.ctr % 16) != 0 { | |
self.block(); | |
} | |
} | |
fn block(&mut self) { | |
let input: [u32; 16] = [ | |
// magic numbers from the original chacha impl. | |
0x61707865, | |
0x3320646e, | |
0x79622d32, | |
0x6b206574, | |
// the seed (aka key in the original impl) | |
self.seed[0], | |
self.seed[1], | |
self.seed[2], | |
self.seed[3], | |
self.seed[4], | |
self.seed[5], | |
self.seed[6], | |
self.seed[7], | |
// 64 bit stream id. | |
self.stream_id as u32, | |
(self.stream_id >> 32) as u32, | |
// 60 bit counter (4 bits are index) | |
(self.ctr / 16) as u32, | |
((self.ctr / 16) >> 32) as u32, | |
]; | |
self.state = input; | |
self.chacha(); | |
for i in 0..16 { | |
self.state[i] = self.state[i].wrapping_add(input[i]); | |
} | |
} | |
#[inline] | |
#[cfg(not(all(target_feature = "sse2", not(no_simd))))] | |
fn chacha(&mut self) { | |
macro_rules! chacha_step { | |
($x:expr, $a:expr, $b:expr, $c:expr, $d:expr) => { | |
$x[$a] = $x[$a].wrapping_add($x[$b]); | |
$x[$d] = ($x[$d] ^ $x[$a]).rotate_left(16); | |
$x[$c] = $x[$c].wrapping_add($x[$d]); | |
$x[$b] = ($x[$b] ^ $x[$c]).rotate_left(12); | |
$x[$a] = $x[$a].wrapping_add($x[$b]); | |
$x[$d] = ($x[$d] ^ $x[$a]).rotate_left(8); | |
$x[$c] = $x[$c].wrapping_add($x[$d]); | |
$x[$b] = ($x[$b] ^ $x[$c]).rotate_left(7); | |
}; | |
} | |
for _ in 0..4 { | |
chacha_step!(self.state, 0, 4, 8, 12); | |
chacha_step!(self.state, 1, 5, 9, 13); | |
chacha_step!(self.state, 2, 6, 10, 14); | |
chacha_step!(self.state, 3, 7, 11, 15); | |
chacha_step!(self.state, 0, 5, 10, 15); | |
chacha_step!(self.state, 1, 6, 11, 12); | |
chacha_step!(self.state, 2, 7, 8, 13); | |
chacha_step!(self.state, 3, 4, 9, 14); | |
} | |
} | |
#[inline] | |
#[cfg(all(target_feature = "sse2", not(no_simd)))] | |
fn chacha(&mut self) { | |
#[cfg(target_arch = "x86")] | |
use core::arch::x86::*; | |
#[cfg(target_arch = "x86_64")] | |
use core::arch::x86_64::*; | |
// TODO(someday): XOP has a real vector rotate, if someone ever cares to | |
// fix https://github.com/rust-lang/stdarch/issues/916 | |
unsafe { | |
#[cfg(not(target_feature = "ssse3"))] | |
macro_rules! mm_rotl { | |
($x:expr, 16) => { | |
_mm_shufflehi_epi16(_mm_shufflelo_epi16($x, 0xb1), 0xb1) | |
}; | |
($x:expr, $c:literal) => {{ | |
let x = $x; | |
_mm_or_si128(_mm_slli_epi32(x, $c), _mm_srli_epi32(x, 32 - $c)) | |
}}; | |
} | |
#[cfg(target_feature = "ssse3")] | |
macro_rules! mm_rotl { | |
($x:expr, 16) => { | |
_mm_shuffle_epi8( | |
$x, | |
_mm_setr_epi8(2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13), | |
) | |
}; | |
($x:expr, 8) => { | |
_mm_shuffle_epi8( | |
$x, | |
_mm_setr_epi8(3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14), | |
) | |
}; | |
($x:expr, $c:tt) => {{ | |
let x = $x; | |
_mm_or_si128(_mm_slli_epi32(x, $c), _mm_srli_epi32(x, 32 - $c)) | |
}}; | |
} | |
const YZWX: i32 = 0x39; | |
const ZWXY: i32 = 0x4e; | |
const WXYZ: i32 = 0x93; | |
let mut a = *(&self.state as *const _ as *const __m128i); | |
let mut b = *(&self.state as *const _ as *const __m128i).add(1); | |
let mut c = *(&self.state as *const _ as *const __m128i).add(2); | |
let mut d = *(&self.state as *const _ as *const __m128i).add(3); | |
for _ in 0..4 { | |
a = _mm_add_epi32(a, b); | |
d = mm_rotl!(_mm_xor_si128(d, a), 16); | |
c = _mm_add_epi32(c, d); | |
b = mm_rotl!(_mm_xor_si128(b, c), 12); | |
a = _mm_add_epi32(a, b); | |
d = mm_rotl!(_mm_xor_si128(d, a), 8); | |
c = _mm_add_epi32(c, d); | |
b = mm_rotl!(_mm_xor_si128(b, c), 7); | |
b = _mm_shuffle_epi32(b, YZWX); | |
c = _mm_shuffle_epi32(c, ZWXY); | |
d = _mm_shuffle_epi32(d, WXYZ); | |
a = _mm_add_epi32(a, b); | |
d = mm_rotl!(_mm_xor_si128(d, a), 16); | |
c = _mm_add_epi32(c, d); | |
b = mm_rotl!(_mm_xor_si128(b, c), 12); | |
a = _mm_add_epi32(a, b); | |
d = mm_rotl!(_mm_xor_si128(d, a), 8); | |
c = _mm_add_epi32(c, d); | |
b = mm_rotl!(_mm_xor_si128(b, c), 7); | |
b = _mm_shuffle_epi32(b, YZWX); | |
c = _mm_shuffle_epi32(c, ZWXY); | |
d = _mm_shuffle_epi32(d, WXYZ); | |
} | |
*(&mut self.state as *mut _ as *mut __m128i) = a; | |
*(&mut self.state as *mut _ as *mut __m128i).add(1) = b; | |
*(&mut self.state as *mut _ as *mut __m128i).add(2) = c; | |
*(&mut self.state as *mut _ as *mut __m128i).add(3) = d; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment