Skip to content

Instantly share code, notes, and snippets.

@recmo
Last active October 17, 2023 23:17
Show Gist options
  • Save recmo/389339a2e71825eadad1e56106b2e24b to your computer and use it in GitHub Desktop.
Save recmo/389339a2e71825eadad1e56106b2e24b to your computer and use it in GitHub Desktop.
Rust aarch64 FEAT_SHA3 implementation of keccak_f1600.
#![cfg(all(target_arch = "aarch64", target_feature = "sha3"))]
use core::arch::asm;
const RC: [u64; 24] = [
0x0000000000000001,
0x0000000000008082,
0x800000000000808a,
0x8000000080008000,
0x000000000000808b,
0x0000000080000001,
0x8000000080008081,
0x8000000000008009,
0x000000000000008a,
0x0000000000000088,
0x0000000080008009,
0x000000008000000a,
0x000000008000808b,
0x800000000000008b,
0x8000000000008089,
0x8000000000008003,
0x8000000000008002,
0x8000000000000080,
0x000000000000800a,
0x800000008000000a,
0x8000000080008081,
0x8000000000008080,
0x0000000080000001,
0x8000000080008008,
];
/// Keccak-f1600 on ARMv8-A with FEAT_SHA3.
///
/// See p. K12.2.2 p. 11,749 of the ARM Reference manual.
/// Adapted from the Keccak-f1600 implementation in the XKCP/K12.
/// see <https://github.com/XKCP/K12/blob/df6a21e6d1f34c1aa36e8d702540899c97dba5a0/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S#L69>
pub fn keccak_f1600(state: &mut [u64; 25]) {
unsafe {
asm!("
// Read state
ld1 {{ v0.1d- v3.1d}}, [x0], #32
ld1 {{ v4.1d- v7.1d}}, [x0], #32
ld1 {{ v8.1d-v11.1d}}, [x0], #32
ld1 {{v12.1d-v15.1d}}, [x0], #32
ld1 {{v16.1d-v19.1d}}, [x0], #32
ld1 {{v20.1d-v23.1d}}, [x0], #32
ld1 {{v24.1d}}, [x0]
sub x0, x0, #192
// Loop 24 rounds
// NOTE: This loop actually computes two f1600 functions in
// parallel, in both the lower and the upper 64-bit of the
// 128-bit registers v0-v24.
mov x8, #24
0: sub x8, x8, #1
// Theta Calculations
eor3.16b v25, v20, v15, v10
eor3.16b v26, v21, v16, v11
eor3.16b v27, v22, v17, v12
eor3.16b v28, v23, v18, v13
eor3.16b v29, v24, v19, v14
eor3.16b v25, v25, v5, v0
eor3.16b v26, v26, v6, v1
eor3.16b v27, v27, v7, v2
eor3.16b v28, v28, v8, v3
eor3.16b v29, v29, v9, v4
rax1.2d v30, v25, v27
rax1.2d v31, v26, v28
rax1.2d v27, v27, v29
rax1.2d v28, v28, v25
rax1.2d v29, v29, v26
// Rho and Phi
eor.16b v0, v0, v29
xar.2d v25, v1, v30, #64 - 1
xar.2d v1, v6, v30, #64 - 44
xar.2d v6, v9, v28, #64 - 20
xar.2d v9, v22, v31, #64 - 61
xar.2d v22, v14, v28, #64 - 39
xar.2d v14, v20, v29, #64 - 18
xar.2d v26, v2, v31, #64 - 62
xar.2d v2, v12, v31, #64 - 43
xar.2d v12, v13, v27, #64 - 25
xar.2d v13, v19, v28, #64 - 8
xar.2d v19, v23, v27, #64 - 56
xar.2d v23, v15, v29, #64 - 41
xar.2d v15, v4, v28, #64 - 27
xar.2d v28, v24, v28, #64 - 14
xar.2d v24, v21, v30, #64 - 2
xar.2d v8, v8, v27, #64 - 55
xar.2d v4, v16, v30, #64 - 45
xar.2d v16, v5, v29, #64 - 36
xar.2d v5, v3, v27, #64 - 28
xar.2d v27, v18, v27, #64 - 21
xar.2d v3, v17, v31, #64 - 15
xar.2d v30, v11, v30, #64 - 10
xar.2d v31, v7, v31, #64 - 6
xar.2d v29, v10, v29, #64 - 3
// Chi and Iota
bcax.16b v20, v26, v22, v8
bcax.16b v21, v8, v23, v22
bcax.16b v22, v22, v24, v23
bcax.16b v23, v23, v26, v24
bcax.16b v24, v24, v8, v26
ld1r.2d {{v26}}, [x1], #8
bcax.16b v17, v30, v19, v3
bcax.16b v18, v3, v15, v19
bcax.16b v19, v19, v16, v15
bcax.16b v15, v15, v30, v16
bcax.16b v16, v16, v3, v30
bcax.16b v10, v25, v12, v31
bcax.16b v11, v31, v13, v12
bcax.16b v12, v12, v14, v13
bcax.16b v13, v13, v25, v14
bcax.16b v14, v14, v31, v25
bcax.16b v7, v29, v9, v4
bcax.16b v8, v4, v5, v9
bcax.16b v9, v9, v6, v5
bcax.16b v5, v5, v29, v6
bcax.16b v6, v6, v4, v29
bcax.16b v3, v27, v0, v28
bcax.16b v4, v28, v1, v0
bcax.16b v0, v0, v2, v1
bcax.16b v1, v1, v27, v2
bcax.16b v2, v2, v28, v27
eor.16b v0,v0,v26
// Rounds loop
cbnz w8, 0b
// Write state
st1 {{ v0.1d- v3.1d}}, [x0], #32
st1 {{ v4.1d- v7.1d}}, [x0], #32
st1 {{ v8.1d-v11.1d}}, [x0], #32
st1 {{v12.1d-v15.1d}}, [x0], #32
st1 {{v16.1d-v19.1d}}, [x0], #32
st1 {{v20.1d-v23.1d}}, [x0], #32
st1 {{v24.1d}}, [x0]
",
in("x0") state.as_mut_ptr(),
in("x1") &RC,
clobber_abi("C"),
options(nostack)
);
}
}
pub fn keccak256(mut bytes: &[u8]) -> [u8; 32] {
const RATE: usize = 1088 / 8;
assert_eq!(RATE % 8, 0);
let mut state = [0u64; 25];
// Intermediate whole blocks
while bytes.len() >= RATE {
for (b, s) in bytes[..RATE].chunks_exact(8).zip(state.iter_mut()) {
*s ^= u64::from_le_bytes(b.try_into().unwrap());
}
bytes = &bytes[RATE..];
keccak_f1600(&mut state);
}
debug_assert!(bytes.len() < RATE);
// Final block with padding
let (words, bytes) = bytes.split_at(bytes.len() & !7);
for (b, s) in words.chunks_exact(8).zip(state.iter_mut()) {
*s ^= u64::from_le_bytes(b.try_into().unwrap());
}
let mut partial_word = [0u8; 8];
partial_word[..bytes.len()].copy_from_slice(&bytes);
partial_word[bytes.len()] = 0x01; // Or 0x06 for SHA3-256
state[words.len() / 8] ^= u64::from_le_bytes(partial_word);
state[(RATE / 8) - 1] ^= 0x8000000000000000;
keccak_f1600(&mut state);
// Output
let mut output = [0_u8; 32];
for (o, s) in output.chunks_exact_mut(8).zip(state.iter()) {
o.copy_from_slice(&s.to_le_bytes());
}
output
}
#[test]
fn test_keccak_f1600() {
// Test vectors are copied from XKCP (eXtended Keccak Code Package)
// https://github.com/XKCP/XKCP/blob/master/tests/TestVectors/KeccakF-1600-IntermediateValues.txt
let state_first = [
0xF1258F7940E1DDE7,
0x84D5CCF933C0478A,
0xD598261EA65AA9EE,
0xBD1547306F80494D,
0x8B284E056253D057,
0xFF97A42D7F8E6FD4,
0x90FEE5A0A44647C4,
0x8C5BDA0CD6192E76,
0xAD30A6F71B19059C,
0x30935AB7D08FFC64,
0xEB5AA93F2317D635,
0xA9A6E6260D712103,
0x81A57C16DBCF555F,
0x43B831CD0347C826,
0x01F22F1A11A5569F,
0x05E5635A21D9AE61,
0x64BEFEF28CC970F2,
0x613670957BC46611,
0xB87C5A554FD00ECB,
0x8C3EE88A1CCF32C8,
0x940C7922AE3A2614,
0x1841F924A2C509E4,
0x16F53526E70465C2,
0x75F644E97F30A13B,
0xEAF1FF7B5CECA249,
];
let state_second = [
0x2D5C954DF96ECB3C,
0x6A332CD07057B56D,
0x093D8D1270D76B6C,
0x8A20D9B25569D094,
0x4F9C4F99E5E7F156,
0xF957B9A2DA65FB38,
0x85773DAE1275AF0D,
0xFAF4F247C3D810F7,
0x1F1B9EE6F79A8759,
0xE4FECC0FEE98B425,
0x68CE61B6B9CE68A1,
0xDEEA66C4BA8F974F,
0x33C43D836EAFB1F5,
0xE00654042719DBD9,
0x7CF8A9F009831265,
0xFD5449A6BF174743,
0x97DDAD33D8994B40,
0x48EAD5FC5D0BE774,
0xE3B8C8EE55B7B03C,
0x91A0226E649E42E9,
0x900E3129E7BADD7B,
0x202A9EC5FAA3CCE8,
0x5B3402464E1C3DB6,
0x609F4E62A44C1059,
0x20D06CD26A8FBF5C,
];
let mut state = [0u64; 25];
keccak_f1600(&mut state);
assert_eq!(state, state_first);
keccak_f1600(&mut state);
assert_eq!(state, state_second);
}
#[test]
fn test_keccak256() {
let input = b"testing";
// 5f16f4c7f149ac4f9510d9cf8cf384038ad348b3bcdc01915f95de12df9d1b02
let expected = [
95, 22, 244, 199, 241, 73, 172, 79, 149, 16, 217, 207, 140, 243, 132, 3, 138, 211, 72, 179,
188, 220, 1, 145, 95, 149, 222, 18, 223, 157, 27, 2,
];
assert_eq!(keccak256(input), expected);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment