Skip to content

Instantly share code, notes, and snippets.

@thomcc
Created January 6, 2021 00:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thomcc/b1ba9b77010bf42982fc7eddab78f283 to your computer and use it in GitHub Desktop.
Save thomcc/b1ba9b77010bf42982fc7eddab78f283 to your computer and use it in GitHub Desktop.
#![cfg_attr(all(feature = "std", not(feature = "getrandom")), no_std)]
mod get_random;
#[repr(C, align(16))]
pub struct ChaCha8 {
state: [u32; 16],
seed: [u32; 8],
ctr: u64,
stream_id: u64,
}
impl ChaCha8 {
/// Initialize this rng with a random seed.
///
/// If `feature = "getrandom"` is enabled, this uses `getrandom` to generate
/// the seed, and panics if `getrandom` returns an error.
///
/// If `feature = "std"` is enabled (and `feature = "getrandom"` is not
/// enabled), this abuses `std::collections::hash_map::RandomState` to
/// generate a random seed.
///
/// If the `"getrandom"` and `"std"` features are *both* disabled, then we
/// attempt to cobble together some randomness using ASLR / hardware
/// randomness (rdseed/rdrand) if available.
///
/// For clarity, the `getrandom` feature should be enabled if you're
/// considering actually using this in a context where security is important
/// (prior to this being audited in some manner, this may not be the most
/// advisable thing).
#[inline]
pub fn new_rand() -> Self {
Self::new_buf(crate::get_random::get_rand_state())
}
/// Initialize this rng with a seed read from the given bytes.
#[inline]
pub const fn from_seed_bytes(seed: [u8; 32]) -> Self {
Self::new_buf([
u32::from_le_bytes([seed[0], seed[1], seed[2], seed[3]]),
u32::from_le_bytes([seed[4], seed[5], seed[6], seed[7]]),
u32::from_le_bytes([seed[8], seed[9], seed[10], seed[11]]),
u32::from_le_bytes([seed[12], seed[13], seed[14], seed[15]]),
u32::from_le_bytes([seed[16], seed[17], seed[18], seed[19]]),
u32::from_le_bytes([seed[20], seed[21], seed[22], seed[23]]),
u32::from_le_bytes([seed[24], seed[25], seed[26], seed[27]]),
u32::from_le_bytes([seed[28], seed[29], seed[30], seed[31]]),
])
}
/// Initialize this rng with the provided seed data, provided as an array of
/// 8 u32s.
#[inline]
pub const fn from_seed_buffer(seed: [u32; 8]) -> Self {
Self::new_full(seed, 0, 0)
}
#[inline]
const fn new_full(seed: [u32; 8], stream_id: u64, ctr: u64) -> Self {
Self {
state: [0; 16],
seed,
stream_id,
ctr: ctr << 4,
}
}
/// Generate a random `u32`, and return it.
#[inline]
pub fn next(&mut self) -> u32 {
let idx = (self.ctr % 16) as usize;
if idx == 0 {
self.block();
}
self.ctr += 1;
self.state[idx]
}
/// Advance the RNG by `n` steps.
#[inline]
pub fn skip(&mut self, n: u64) {
let idx = self.ctr % 16;
self.ctr = self.ctr.wrapping_add(n);
if (idx + n) >= 16 && (self.ctr % 16) != 0 {
self.block();
}
}
fn block(&mut self) {
let input: [u32; 16] = [
// magic numbers from the original chacha impl.
0x61707865,
0x3320646e,
0x79622d32,
0x6b206574,
// the seed (aka key in the original impl)
self.seed[0],
self.seed[1],
self.seed[2],
self.seed[3],
self.seed[4],
self.seed[5],
self.seed[6],
self.seed[7],
// 64 bit stream id.
self.stream_id as u32,
(self.stream_id >> 32) as u32,
// 60 bit counter (4 bits are index)
(self.ctr / 16) as u32,
((self.ctr / 16) >> 32) as u32,
];
self.state = input;
self.chacha();
for i in 0..16 {
self.state[i] = self.state[i].wrapping_add(input[i]);
}
}
#[inline]
#[cfg(not(all(target_feature = "sse2", not(no_simd))))]
fn chacha(&mut self) {
macro_rules! chacha_step {
($x:expr, $a:expr, $b:expr, $c:expr, $d:expr) => {
$x[$a] = $x[$a].wrapping_add($x[$b]);
$x[$d] = ($x[$d] ^ $x[$a]).rotate_left(16);
$x[$c] = $x[$c].wrapping_add($x[$d]);
$x[$b] = ($x[$b] ^ $x[$c]).rotate_left(12);
$x[$a] = $x[$a].wrapping_add($x[$b]);
$x[$d] = ($x[$d] ^ $x[$a]).rotate_left(8);
$x[$c] = $x[$c].wrapping_add($x[$d]);
$x[$b] = ($x[$b] ^ $x[$c]).rotate_left(7);
};
}
for _ in 0..4 {
chacha_step!(self.state, 0, 4, 8, 12);
chacha_step!(self.state, 1, 5, 9, 13);
chacha_step!(self.state, 2, 6, 10, 14);
chacha_step!(self.state, 3, 7, 11, 15);
chacha_step!(self.state, 0, 5, 10, 15);
chacha_step!(self.state, 1, 6, 11, 12);
chacha_step!(self.state, 2, 7, 8, 13);
chacha_step!(self.state, 3, 4, 9, 14);
}
}
#[inline]
#[cfg(all(target_feature = "sse2", not(no_simd)))]
fn chacha(&mut self) {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
// TODO(someday): XOP has a real vector rotate, if someone ever cares to
// fix https://github.com/rust-lang/stdarch/issues/916
unsafe {
#[cfg(not(target_feature = "ssse3"))]
macro_rules! mm_rotl {
($x:expr, 16) => {
_mm_shufflehi_epi16(_mm_shufflelo_epi16($x, 0xb1), 0xb1)
};
($x:expr, $c:literal) => {{
let x = $x;
_mm_or_si128(_mm_slli_epi32(x, $c), _mm_srli_epi32(x, 32 - $c))
}};
}
#[cfg(target_feature = "ssse3")]
macro_rules! mm_rotl {
($x:expr, 16) => {
_mm_shuffle_epi8(
$x,
_mm_setr_epi8(2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13),
)
};
($x:expr, 8) => {
_mm_shuffle_epi8(
$x,
_mm_setr_epi8(3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14),
)
};
($x:expr, $c:tt) => {{
let x = $x;
_mm_or_si128(_mm_slli_epi32(x, $c), _mm_srli_epi32(x, 32 - $c))
}};
}
const YZWX: i32 = 0x39;
const ZWXY: i32 = 0x4e;
const WXYZ: i32 = 0x93;
let mut a = *(&self.state as *const _ as *const __m128i);
let mut b = *(&self.state as *const _ as *const __m128i).add(1);
let mut c = *(&self.state as *const _ as *const __m128i).add(2);
let mut d = *(&self.state as *const _ as *const __m128i).add(3);
for _ in 0..4 {
a = _mm_add_epi32(a, b);
d = mm_rotl!(_mm_xor_si128(d, a), 16);
c = _mm_add_epi32(c, d);
b = mm_rotl!(_mm_xor_si128(b, c), 12);
a = _mm_add_epi32(a, b);
d = mm_rotl!(_mm_xor_si128(d, a), 8);
c = _mm_add_epi32(c, d);
b = mm_rotl!(_mm_xor_si128(b, c), 7);
b = _mm_shuffle_epi32(b, YZWX);
c = _mm_shuffle_epi32(c, ZWXY);
d = _mm_shuffle_epi32(d, WXYZ);
a = _mm_add_epi32(a, b);
d = mm_rotl!(_mm_xor_si128(d, a), 16);
c = _mm_add_epi32(c, d);
b = mm_rotl!(_mm_xor_si128(b, c), 12);
a = _mm_add_epi32(a, b);
d = mm_rotl!(_mm_xor_si128(d, a), 8);
c = _mm_add_epi32(c, d);
b = mm_rotl!(_mm_xor_si128(b, c), 7);
b = _mm_shuffle_epi32(b, YZWX);
c = _mm_shuffle_epi32(c, ZWXY);
d = _mm_shuffle_epi32(d, WXYZ);
}
*(&mut self.state as *mut _ as *mut __m128i) = a;
*(&mut self.state as *mut _ as *mut __m128i).add(1) = b;
*(&mut self.state as *mut _ as *mut __m128i).add(2) = c;
*(&mut self.state as *mut _ as *mut __m128i).add(3) = d;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment