Skip to content

Instantly share code, notes, and snippets.

@thynson
Last active January 29, 2023 07:09
Show Gist options
  • Save thynson/47ff24fbdbe8eb512f41891c5210d2ba to your computer and use it in GitHub Desktop.
Save thynson/47ff24fbdbe8eb512f41891c5210d2ba to your computer and use it in GitHub Desktop.
A rust implementation of wyhash_final4
///
/// This is free and unencumbered software released into the public domain.
///
/// Anyone is free to copy, modify, publish, use, compile, sell, or
/// distribute this software, either in source code form or as a compiled
/// binary, for any purpose, commercial or non-commercial, and by any
/// means.
///
/// In jurisdictions that recognize copyright laws, the author or authors
/// of this software dedicate any and all copyright interest in the
/// software to the public domain. We make this dedication for the benefit
/// of the public at large and to the detriment of our heirs and
/// successors. We intend this dedication to be an overt act of
/// relinquishment in perpetuity of all present and future rights to this
/// software under copyright law.
///
/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
/// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
/// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
/// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
/// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
/// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
/// OTHER DEALINGS IN THE SOFTWARE.
///
/// For more information, please refer to <http://unlicense.org/>
///
///
/// A rust implementation of the wyhash_final4 algorithm,
/// which resolved some bad seed issue but is not officially published yet,
/// as of 2023-01-29.
///
/// See https://github.com/wangyi-fudan/wyhash/blob/ea3b25e1aef55d90f707c3a292eeb9162e2615d8/wyhash.h
/// for the exact version of the algorithm used here.
///
#[inline(always)]
fn wy_mum64_normal(a: u64, b: u64) -> (u64, u64) {
let m = (a as u128) * (b as u128);
(m as u64, (m >> 64) as u64)
}
#[inline(always)]
fn wy_mum64_condom(mut a: u64, mut b: u64) -> (u64, u64) {
let m = (a as u128) * (b as u128);
a ^= m as u64;
b ^= (m >> 64) as u64;
(a, b)
}
#[inline(always)]
fn wy_rotate(x: u64) -> u64 {
(x >> 32) | (x << 32)
}
#[inline(always)]
fn wy_mum32_normal(a: u64, b: u64) -> (u64, u64) {
let hh = ((a >> 32) as u64) * ((b >> 32) as u64);
let ll = (a as u32 as u64) * (b as u32 as u64);
let hl = ((a >> 32) as u64) * (b as u32 as u64);
let lh = (a as u32 as u64) * ((b >> 32) as u64);
(wy_rotate(hl) ^ hh, wy_rotate(lh) ^ ll)
}
#[inline(always)]
fn wy_mum32_condom(a: u64, b: u64) -> (u64, u64) {
let hh = ((a >> 32) as u64) * ((b >> 32) as u64);
let ll = (a as u32 as u64) * (b as u32 as u64);
let hl = ((a >> 32) as u64) * (b as u32) as u64;
let lh = (a as u32) as u64 * ((b >> 32) as u64);
(a ^ wy_rotate(hl) ^ hh, b ^ wy_rotate(lh) ^ ll)
}
#[inline(always)]
fn wy_mix64_normal(a: u64, b: u64) -> u64 {
let (a, b) = wy_mum64_normal(a, b);
a ^ b
}
#[inline(always)]
fn wy_mix32_normal(a: u64, b: u64) -> u64 {
let (a, b) = wy_mum32_normal(a, b);
a ^ b
}
#[inline(always)]
fn wy_mix64_condom(a: u64, b: u64) -> u64 {
let (a, b) = wy_mum64_condom(a, b);
a ^ b
}
#[inline(always)]
fn wy_mix32_condom(a: u64, b: u64) -> u64 {
let (a, b) = wy_mum32_condom(a, b);
a ^ b
}
pub const DEFAULT_SECRET: [u64; 4] = [
0xa0761d6478bd642fu64,
0xe7037ed1a0b428dbu64,
0x8ebc6af09c88c6e3u64,
0x589965cc75374cc3u64,
];
fn wy_read_8(input: &[u8]) -> u64 {
let mut bytes = [0u8; 8];
bytes.copy_from_slice(&input[0..8]);
u64::from_le_bytes(bytes)
}
fn wy_read_4(input: &[u8]) -> u64 {
let mut bytes = [0u8; 4];
bytes.copy_from_slice(&input[0..4]);
u32::from_le_bytes(bytes) as u64
}
fn wy_read_3(input: &[u8]) -> u64 {
let len = input.len();
((input[0] as u64) << 16) | ((input[len >> 1] as u64) << 8) | (input[len - 1] as u64)
}
///
/// The wyhash hash function
///
/// This is identical to the C implementation with `WYHASH_CONDOM` defined to 2
/// and `WYHASH_32BIT_MUM` defined to 0
///
pub fn wy_hash64_condom(input: &[u8], mut seed: u64, secret: [u64; 4]) -> u64 {
let len = input.len();
seed ^= wy_mix64_condom(seed ^ secret[0], secret[1]);
let mut a = 0u64;
let mut b = 0u64;
if len <= 16 {
if len >= 4 {
a = wy_read_4(input) << 32;
a |= wy_read_4(&input[(len >> 3) << 2..len]);
b = wy_read_4(&input[(len - 4)..]) << 32;
b |= wy_read_4(&input[(len - 4 - ((len >> 3) << 2))..len]);
} else if len > 0 {
a = wy_read_3(input);
}
} else {
let mut i = 0;
if len - i > 48 {
let mut s1 = seed;
let mut s2 = seed;
while input.len() - i > 48 {
seed = wy_mix64_condom(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
s1 = wy_mix64_condom(
wy_read_8(&input[(i + 16)..]) ^ secret[2],
wy_read_8(&input[(i + 24)..]) ^ s1,
);
s2 = wy_mix64_condom(
wy_read_8(&input[(i + 32)..]) ^ secret[3],
wy_read_8(&input[(i + 40)..]) ^ s2,
);
i += 48;
}
seed ^= s1 ^ s2;
}
if len - i > 32 {
seed = wy_mix64_condom(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
i += 16;
}
if len - i > 16 {
seed = wy_mix64_condom(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
}
a = wy_read_8(&input[(len - 16)..]);
b = wy_read_8(&input[(len - 8)..]);
}
a ^= secret[1];
b ^= seed;
(a, b) = wy_mum64_condom(a, b);
wy_mix64_condom(a ^ secret[0] ^ (len as u64), b ^ secret[1])
}
///
/// The wyhash hash function
///
/// This is identical to the C implementation with `WYHASH_CONDOM` defined to 2
/// and `WYHASH_32BIT_MUM` defined to 1
///
pub fn wy_hash32_condom(input: &[u8], mut seed: u64, secret: [u64; 4]) -> u64 {
let len = input.len();
seed ^= wy_mix32_condom(seed ^ secret[0], secret[1]);
let mut a = 0u64;
let mut b = 0u64;
if len <= 16 {
if len >= 4 {
a = wy_read_4(input) << 32;
a |= wy_read_4(&input[(len >> 3) << 2..len]);
b = wy_read_4(&input[(len - 4)..]) << 32;
b |= wy_read_4(&input[(len - 4 - ((len >> 3) << 2))..len]);
} else if len > 0 {
a = wy_read_3(input);
}
} else {
let mut i = 0;
if len - i > 48 {
let mut s1 = seed;
let mut s2 = seed;
while input.len() - i > 48 {
seed = wy_mix32_condom(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
s1 = wy_mix32_condom(
wy_read_8(&input[(i + 16)..]) ^ secret[2],
wy_read_8(&input[(i + 24)..]) ^ s1,
);
s2 = wy_mix32_condom(
wy_read_8(&input[(i + 32)..]) ^ secret[3],
wy_read_8(&input[(i + 40)..]) ^ s2,
);
i += 48;
}
seed ^= s1 ^ s2;
}
if len - i > 32 {
seed = wy_mix32_condom(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
i += 16;
}
if len - i > 16 {
seed = wy_mix32_condom(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
}
a = wy_read_8(&input[(len - 16)..]);
b = wy_read_8(&input[(len - 8)..]);
}
a ^= secret[1];
b ^= seed;
(a, b) = wy_mum32_condom(a, b);
wy_mix32_condom(a ^ secret[0] ^ (len as u64), b ^ secret[1])
}
///
/// The wyhash hash function
///
/// This is identical to the C implementation with `WYHASH_CONDOM` defined to 1
/// and `WYHASH_32BIT_MUM` defined to 0
///
pub fn wy_hash64_normal(input: &[u8], mut seed: u64, secret: [u64; 4]) -> u64 {
let len = input.len();
seed ^= wy_mix64_normal(seed ^ secret[0], secret[1]);
let mut a = 0u64;
let mut b = 0u64;
if len <= 16 {
if len >= 4 {
a = wy_read_4(input) << 32;
a |= wy_read_4(&input[(len >> 3) << 2..len]);
b = wy_read_4(&input[(len - 4)..]) << 32;
b |= wy_read_4(&input[(len - 4 - ((len >> 3) << 2))..len]);
} else if len > 0 {
a = wy_read_3(input);
}
} else {
let mut i = 0;
if len - i > 48 {
let mut s1 = seed;
let mut s2 = seed;
while input.len() - i > 48 {
seed = wy_mix64_normal(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
s1 = wy_mix64_normal(
wy_read_8(&input[(i + 16)..]) ^ secret[2],
wy_read_8(&input[(i + 24)..]) ^ s1,
);
s2 = wy_mix64_normal(
wy_read_8(&input[(i + 32)..]) ^ secret[3],
wy_read_8(&input[(i + 40)..]) ^ s2,
);
i += 48;
}
seed ^= s1 ^ s2;
}
if len - i > 32 {
seed = wy_mix64_normal(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
i += 16;
}
if len - i > 16 {
seed = wy_mix64_normal(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
}
a = wy_read_8(&input[(len - 16)..]);
b = wy_read_8(&input[(len - 8)..]);
}
a ^= secret[1];
b ^= seed;
(a, b) = wy_mum64_normal(a, b);
wy_mix64_normal(a ^ secret[0] ^ (len as u64), b ^ secret[1])
}
///
/// The wyhash hash function
///
/// This is identical to the C implementation with `WYHASH_CONDOM` defined to 1
/// and `WYHASH_32BIT_MUM` defined to 1
///
pub fn wy_hash32_normal(input: &[u8], mut seed: u64, secret: [u64; 4]) -> u64 {
let len = input.len();
seed ^= wy_mix32_normal(seed ^ secret[0], secret[1]);
let mut a = 0u64;
let mut b = 0u64;
if len <= 16 {
if len >= 4 {
a = wy_read_4(input) << 32;
a |= wy_read_4(&input[(len >> 3) << 2..len]);
b = wy_read_4(&input[(len - 4)..]) << 32;
b |= wy_read_4(&input[(len - 4 - ((len >> 3) << 2))..len]);
} else if len > 0 {
a = wy_read_3(input);
}
} else {
let mut i = 0;
if len - i > 48 {
let mut s1 = seed;
let mut s2 = seed;
while input.len() - i > 48 {
seed = wy_mix32_normal(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
s1 = wy_mix32_normal(
wy_read_8(&input[(i + 16)..]) ^ secret[2],
wy_read_8(&input[(i + 24)..]) ^ s1,
);
s2 = wy_mix32_normal(
wy_read_8(&input[(i + 32)..]) ^ secret[3],
wy_read_8(&input[(i + 40)..]) ^ s2,
);
i += 48;
}
seed ^= s1 ^ s2;
}
if len - i > 32 {
seed = wy_mix32_normal(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
i += 16;
}
if len - i > 16 {
seed = wy_mix32_normal(
wy_read_8(&input[i..]) ^ secret[1],
wy_read_8(&input[(i + 8)..]) ^ seed,
);
}
a = wy_read_8(&input[(len - 16)..]);
b = wy_read_8(&input[(len - 8)..]);
}
a ^= secret[1];
b ^= seed;
(a, b) = wy_mum32_normal(a, b);
wy_mix32_normal(a ^ secret[0] ^ (len as u64), b ^ secret[1])
}
#[cfg(test)]
mod test {
use crate::{
wy_hash32_condom, wy_hash32_normal, wy_hash64_condom, wy_hash64_normal, DEFAULT_SECRET,
};
#[test]
fn wyhash64_normal_official_test_vector() {
assert_eq!(
wy_hash64_normal("".as_bytes(), 0, DEFAULT_SECRET),
0x409638ee2bde459
);
assert_eq!(
wy_hash64_normal("a".as_bytes(), 1, DEFAULT_SECRET),
0xa8412d091b5fe0a9
);
assert_eq!(
wy_hash64_normal("abc".as_bytes(), 2, DEFAULT_SECRET),
0x32dd92e4b2915153
);
assert_eq!(
wy_hash64_normal("message digest".as_bytes(), 3, DEFAULT_SECRET),
0x8619124089a3a16b
);
assert_eq!(
wy_hash64_normal("abcdefghijklmnopqrstuvwxyz".as_bytes(), 4, DEFAULT_SECRET),
0x7a43afb61d7f5f40
);
assert_eq!(
wy_hash64_normal(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789".as_bytes(),
5,
DEFAULT_SECRET
),
0xff42329b90e50d58
);
assert_eq!(
wy_hash64_normal(
"12345678901234567890123456789012345678901234567890123456789012345678901234567890"
.as_bytes(),
6,
DEFAULT_SECRET
),
0xc39cab13b115aad3
);
assert_eq!(
wy_hash64_normal(
"wyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhash"
.as_bytes(),
7,
DEFAULT_SECRET
),
0x354dba9ae15087b1
);
}
#[test]
fn wyhash64_condom_official_test_vector() {
assert_eq!(
wy_hash64_condom("".as_bytes(), 0, DEFAULT_SECRET),
0x90d3db895794f51
);
assert_eq!(
wy_hash64_condom("a".as_bytes(), 1, DEFAULT_SECRET),
0xc9654dd2d8b02dce
);
assert_eq!(
wy_hash64_condom("abc".as_bytes(), 2, DEFAULT_SECRET),
0xc04b780dfa37c941
);
assert_eq!(
wy_hash64_condom("message digest".as_bytes(), 3, DEFAULT_SECRET),
0xa8bcfb355c9d5ddf
);
assert_eq!(
wy_hash64_condom("abcdefghijklmnopqrstuvwxyz".as_bytes(), 4, DEFAULT_SECRET),
0x3db6c14702cafd0a
);
assert_eq!(
wy_hash64_condom(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789".as_bytes(),
5,
DEFAULT_SECRET
),
0x662c013426adcd75
);
assert_eq!(
wy_hash64_condom(
"12345678901234567890123456789012345678901234567890123456789012345678901234567890"
.as_bytes(),
6,
DEFAULT_SECRET
),
0x79b349a43dca3fca
);
assert_eq!(
wy_hash64_condom(
"wyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhash"
.as_bytes(),
7,
DEFAULT_SECRET
)
,
0x835c502d215ef90c
);
}
#[test]
fn wyhash32_normal_official_test_vector() {
assert_eq!(
wy_hash32_normal("".as_bytes(), 0, DEFAULT_SECRET),
0x4b80acaa567a5c84
);
assert_eq!(
wy_hash32_normal("a".as_bytes(), 1, DEFAULT_SECRET),
0xb78e7daf065068fa
);
assert_eq!(
wy_hash32_normal("abc".as_bytes(), 2, DEFAULT_SECRET),
0xd176a04d1bbbff00
);
assert_eq!(
wy_hash32_normal("message digest".as_bytes(), 3, DEFAULT_SECRET),
0x8e97d7095d8ad10f
);
assert_eq!(
wy_hash32_normal("abcdefghijklmnopqrstuvwxyz".as_bytes(), 4, DEFAULT_SECRET),
0x857e6e009e6f847c
);
assert_eq!(
wy_hash32_normal(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789".as_bytes(),
5,
DEFAULT_SECRET
),
0x2d10fde4d0d281cf
);
assert_eq!(
wy_hash32_normal(
"12345678901234567890123456789012345678901234567890123456789012345678901234567890"
.as_bytes(),
6,
DEFAULT_SECRET
),
0x1aeafe0bc395545
);
assert_eq!(
wy_hash32_normal(
"wyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhash"
.as_bytes(),
7,
DEFAULT_SECRET
),
0xd4bbfddb598f9a73
);
}
#[test]
fn wyhash32_condom_official_test_vector() {
assert_eq!(
wy_hash32_condom("".as_bytes(), 0, DEFAULT_SECRET),
0xeea54221671289db
);
assert_eq!(
wy_hash32_condom("a".as_bytes(), 1, DEFAULT_SECRET),
0xac4d02accfbeae5f
);
assert_eq!(
wy_hash32_condom("abc".as_bytes(), 2, DEFAULT_SECRET),
0xe6a807320c2ecb45
);
assert_eq!(
wy_hash32_condom("message digest".as_bytes(), 3, DEFAULT_SECRET),
0xce453c352850b812
);
assert_eq!(
wy_hash32_condom("abcdefghijklmnopqrstuvwxyz".as_bytes(), 4, DEFAULT_SECRET),
0xbbe236c15d78c93f
);
assert_eq!(
wy_hash32_condom(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789".as_bytes(),
5,
DEFAULT_SECRET
),
0x5ac6239be02bd28
);
assert_eq!(
wy_hash32_condom(
"12345678901234567890123456789012345678901234567890123456789012345678901234567890"
.as_bytes(),
6,
DEFAULT_SECRET
),
0x8d6d04f2b998b85a
);
assert_eq!(
wy_hash32_condom(
"wyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhashwyhash"
.as_bytes(),
7,
DEFAULT_SECRET
),
0x57048a9fa7ae0bbd
);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment