|
// License BSD-2 Clause - Copyright (c) 2019-present, Alexandre Mutel |
|
// Derived work from: https://github.com/Cyan4973/xxHash/blob/dev/xxh3.h |
|
// xxHash - Extremely Fast Hash algorithm |
|
// Development source file for `xxh3` |
|
// Copyright (C) 2019-present, Yann Collet. |
|
// BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
|
|
|
using System; |
|
using System.Runtime.CompilerServices; |
|
using System.Runtime.InteropServices; |
|
using System.Runtime.Intrinsics; |
|
using System.Runtime.Intrinsics.X86; |
|
|
|
namespace StarkPlatform.NativeCompiler.Utils |
|
{ |
|
/// <summary> |
|
/// An incremental 128-bits hash derived from XXH3. SSE version of <see cref="XXH3Fast"/> but SSE is only used in the final compute hash. |
|
/// </summary> |
|
/// <remarks> |
|
/// This is a derived implementation from XXH3. |
|
/// The main difference with the original algorithm is that we expect the working set |
|
/// of data to be small (< 1024 bytes), so we are basically rewriting the function |
|
/// XXH3_accumulate_512 |
|
/// https://github.com/Cyan4973/xxHash/blob/77fd98f6b57c6c17ec43606c74fa8cb96eb17dc0/xxh3.h#L663-L684 |
|
/// to work per integer instead and we are only using 32 bytes for the accumulator (instead of 64 bytes) |
|
/// as in the original XXH3 |
|
/// </remarks> |
|
[StructLayout(LayoutKind.Explicit)] |
|
public unsafe struct XXH3FastSSE |
|
{ |
|
[FieldOffset(0)] |
|
private Vector128<uint> _a; |
|
[FieldOffset(16)] |
|
private Vector128<uint> _b; |
|
[FieldOffset(0)] |
|
private fixed uint _accumulator[ACCUMULATOR_COUNT]; |
|
[FieldOffset(0)] |
|
private fixed byte _accumulatorBytes[ACCUMULATOR_SIZE]; |
|
[FieldOffset(32)] |
|
private int _secretOffset; |
|
[FieldOffset(36)] |
|
private int _accOffset; |
|
|
|
/// <summary> |
|
/// Write an integer value to the hash |
|
/// </summary> |
|
/// <param name="data">The integer data to add to the hash</param> |
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
public void Write(uint data) |
|
{ |
|
ref var localSecret = ref Unsafe.AddByteOffset(ref SecretUInt, new IntPtr(_secretOffset)); |
|
_secretOffset = (_secretOffset + sizeof(uint)) & (XXH_SECRET_DEFAULT_SIZE - 1); |
|
var key1 = localSecret ^ data; |
|
Unsafe.As<byte, uint>(ref _accumulatorBytes[_accOffset]) += (ushort)key1 * (key1 >> 16) + data; |
|
_accOffset = (_accOffset + sizeof(uint)) & (ACCUMULATOR_SIZE - 1); |
|
} |
|
|
|
/// <summary> |
|
/// Computes the final hash from previous written data. |
|
/// </summary> |
|
/// <param name="count">The number of <see cref="Write"/> issued</param> |
|
/// <returns>A hash as a 128-bits</returns> |
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
public Vector128<ulong> Compute(uint count) |
|
{ |
|
return Vector128.Create(XXH3_mergeAccs_low(count * PRIME64_1), XXH3_mergeAccs_high(~(count * PRIME64_2))); |
|
} |
|
|
|
// ReSharper disable InconsistentNaming |
|
private const ulong PRIME64_1 = 11400714785074694791UL; |
|
private const ulong PRIME64_2 = 14029467366897019727UL; |
|
private const ulong PRIME64_3 = 1609587929392839161UL; |
|
|
|
private const int ACCUMULATOR_COUNT = 8; |
|
|
|
private const int ACCUMULATOR_SIZE = ACCUMULATOR_COUNT * sizeof(uint); |
|
|
|
private const int XXH_SECRET_DEFAULT_SIZE = 192; |
|
|
|
private static ReadOnlySpan<byte> SecretAsBytes => new ReadOnlySpan<byte>(new byte[XXH_SECRET_DEFAULT_SIZE] { |
|
0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, |
|
0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, |
|
0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, |
|
0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, |
|
0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, |
|
0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, |
|
0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, |
|
0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, |
|
|
|
0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, |
|
0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, |
|
0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, |
|
0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, |
|
}); |
|
|
|
private static ref uint SecretUInt |
|
{ |
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
get => ref Unsafe.As<byte, uint>(ref Unsafe.AsRef(SecretAsBytes.GetPinnableReference())); |
|
} |
|
|
|
private static ref uint SecretUIntLow |
|
{ |
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
get => ref Unsafe.As<byte, uint>(ref Unsafe.AsRef(SecretAsBytes[11])); |
|
} |
|
|
|
private static ref uint SecretUIntLow128 |
|
{ |
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
get => ref Unsafe.As<byte, uint>(ref Unsafe.AsRef(SecretAsBytes[11 + 16])); |
|
} |
|
|
|
private static ref uint SecretUIntHigh |
|
{ |
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
get => ref Unsafe.As<byte, uint>(ref Unsafe.AsRef(SecretAsBytes[XXH_SECRET_DEFAULT_SIZE - ACCUMULATOR_SIZE - 11])); |
|
} |
|
|
|
private static ref uint SecretUIntHigh128 |
|
{ |
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
get => ref Unsafe.As<byte, uint>(ref Unsafe.AsRef(SecretAsBytes[XXH_SECRET_DEFAULT_SIZE - ACCUMULATOR_SIZE - 11 + 16])); |
|
} |
|
|
|
private ulong XXH3_mergeAccs_low(ulong start) |
|
{ |
|
var a = Sse2.Xor(_a, Sse2.LoadVector128((uint*)Unsafe.AsPointer(ref SecretUIntLow))); |
|
var lefta = Sse2.Multiply(a, Vector128.AsUInt32(Sse2.Shuffle(Vector128.AsSingle(a), Vector128.AsSingle(a), 0b_10_11_00_01))); |
|
var b = Sse2.Xor(_b, Sse2.LoadVector128((uint*)Unsafe.AsPointer(ref SecretUIntLow128))); |
|
var leftb = Sse2.Multiply(b, Vector128.AsUInt32(Sse2.Shuffle(Vector128.AsSingle(b), Vector128.AsSingle(b), 0b_10_11_00_01))); |
|
var result = Sse2.Add(lefta, leftb); |
|
var result64 = Sse41.X64.Extract(Sse2.Add(result, Vector128.AsUInt64(Sse2.Shuffle(Vector128.AsSingle(result), Vector128.AsSingle(result), 0b_00_00_11_10))), 0); |
|
return XXH3_avalanche(result64 + start); |
|
} |
|
|
|
private ulong XXH3_mergeAccs_high(ulong start) |
|
{ |
|
var a = Sse2.Xor(_a, Sse2.LoadVector128((uint*)Unsafe.AsPointer(ref SecretUIntHigh))); |
|
var lefta = Sse2.Multiply(a, Vector128.AsUInt32(Sse2.Shuffle(Vector128.AsSingle(a), Vector128.AsSingle(a), 0b_10_11_00_01))); |
|
var b = Sse2.Xor(_b, Sse2.LoadVector128((uint*)Unsafe.AsPointer(ref SecretUIntHigh128))); |
|
var leftb = Sse2.Multiply(b, Vector128.AsUInt32(Sse2.Shuffle(Vector128.AsSingle(b), Vector128.AsSingle(b), 0b_10_11_00_01))); |
|
var result = Sse2.Add(lefta, leftb); |
|
var result64 = Sse41.X64.Extract(Sse2.Add(result, Vector128.AsUInt64(Sse2.Shuffle(Vector128.AsSingle(result), Vector128.AsSingle(result), 0b_00_00_11_10))), 0); |
|
return XXH3_avalanche(result64 + start); |
|
} |
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)] |
|
private static ulong XXH3_avalanche(ulong h64) |
|
{ |
|
h64 ^= h64 >> 37; |
|
h64 *= PRIME64_3; |
|
h64 ^= h64 >> 32; |
|
return h64; |
|
} |
|
// ReSharper restore InconsistentNaming |
|
} |
|
} |