Skip to content

Instantly share code, notes, and snippets.

@vchirikov
Last active May 2, 2020 22:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vchirikov/14ce3c3ed5b1e8938bd7d30106ec6e84 to your computer and use it in GitHub Desktop.
Save vchirikov/14ce3c3ed5b1e8938bd7d30106ec6e84 to your computer and use it in GitHub Desktop.
MurmurHash3
public interface IHashFunction
{
/// <summary>
/// Computes the 32-bit hash for the <seealso cref="ReadOnlySpan{T}"/>
/// </summary>
byte[] ComputeHash128(in ReadOnlySpan<byte> buffer);
/// <summary>
/// Computes the 128-bit hash for the <seealso cref="ReadOnlySpan{T}"/>
/// </summary>
uint ComputeHash32(in ReadOnlySpan<byte> buffer);
}
/// <summary>
/// <see href="https://github.com/aappleby/smhasher/wiki/MurmurHash3">Docs</see>
/// and <see href="https://github.com/aappleby/smhasher/blob/92cf3702fcfaadc84eb7bef59825a23e0cd84f56/src/MurmurHash3.cpp#L255">MurmurHash3.cpp</see>
/// You can do this online on <seealso href="http://murmurhash.shorelabs.com/"/>
/// Contains x64-optimized versions only.
/// </summary>
public class MurmurHash3 : IHashFunction
{
/// <inheritdoc/>
public unsafe uint ComputeHash32(in ReadOnlySpan<byte> buffer)
{
const uint seed = 0;
var len = buffer.Length;
int nblocks = len / 16;
uint h1 = seed;
const uint c1 = 0xcc9e2d51;
const uint c2 = 0x1b873593;
// body
fixed (byte* pbuffer = buffer)
{
byte* pinput = pbuffer;
uint* body = (uint*)pinput;
uint k1;
for (int i = -nblocks; i > 0; i++)
{
k1 = body[i];
k1 *= c1;
k1 = (k1 << 15) | (k1 >> (32 - 15)); // ROTL32(k1, 15)
k1 *= c2;
h1 ^= k1;
h1 = (h1 << 13) | (h1 >> (32 - 13)); // ROTL32(h1, 13)
h1 = (h1 * 5) + 0xe6546b64;
}
// tail
byte* tail = pinput + (nblocks * 4);
k1 = 0;
switch (len & 3)
{
case 3:
k1 ^= (uint)tail[2] << 16;
goto case 2;
case 2:
k1 ^= (uint)tail[1] << 8;
goto case 1;
case 1:
k1 ^= (uint)tail[0];
k1 *= c1;
k1 = (k1 << 15) | (k1 >> (32 - 15)); // ROTL32(k1, 15)
k1 *= c2;
h1 ^= k1;
break;
};
}
// finalization
h1 ^= (uint)len;
h1 = FMix32(h1);
return h1;
}
/// <inheritdoc/>
public unsafe byte[] ComputeHash128(in ReadOnlySpan<byte> buffer)
{
const ulong c1 = 0x87c37b91_114253d5;
const ulong c2 = 0x4cf5ad43_2745937f;
const ulong seed = 0;
var len = buffer.Length;
int nblocks = len / 16;
ulong h1 = seed;
ulong h2 = seed;
// body
fixed (byte* pbuffer = buffer)
{
byte* pinput = pbuffer;
ulong* body = (ulong*)pinput;
ulong k1;
ulong k2;
for (int i = 0; i < nblocks; i++)
{
k1 = body[i * 2];
k2 = body[(i * 2) + 1];
k1 *= c1;
k1 = (k1 << 31) | (k1 >> (64 - 31)); // ROTL64(k1, 31);
k1 *= c2;
h1 ^= k1;
h1 = (h1 << 27) | (h1 >> (64 - 27)); // ROTL64(h1, 27);
h1 += h2;
h1 = (h1 * 5) + 0x52dce729;
k2 *= c2;
k2 = (k2 << 33) | (k2 >> (64 - 33)); // ROTL64(k2, 33);
k2 *= c1;
h2 ^= k2;
h2 = (h2 << 31) | (h2 >> (64 - 31)); // ROTL64(h2, 31);
h2 += h1;
h2 = (h2 * 5) + 0x38495ab5;
}
// tail
k1 = 0;
k2 = 0;
byte* tail = pinput + (nblocks * 16);
switch (len & 15)
{
case 15:
k2 ^= (ulong)tail[14] << 48;
goto case 14;
case 14:
k2 ^= (ulong)tail[13] << 40;
goto case 13;
case 13:
k2 ^= (ulong)tail[12] << 32;
goto case 12;
case 12:
k2 ^= (ulong)tail[11] << 24;
goto case 11;
case 11:
k2 ^= (ulong)tail[10] << 16;
goto case 10;
case 10:
k2 ^= (ulong)tail[9] << 8;
goto case 9;
case 9:
k2 ^= tail[8];
k2 *= c2;
k2 = (k2 << 33) | (k2 >> (64 - 33)); // ROTL64(k2, 33);
k2 *= c1;
h2 ^= k2;
goto case 8;
case 8:
k1 ^= (ulong)tail[7] << 56;
goto case 7;
case 7:
k1 ^= (ulong)tail[6] << 48;
goto case 6;
case 6:
k1 ^= (ulong)tail[5] << 40;
goto case 5;
case 5:
k1 ^= (ulong)tail[4] << 32;
goto case 4;
case 4:
k1 ^= (ulong)tail[3] << 24;
goto case 3;
case 3:
k1 ^= (ulong)tail[2] << 16;
goto case 2;
case 2:
k1 ^= (ulong)tail[1] << 8;
goto case 1;
case 1:
k1 ^= tail[0];
k1 *= c1;
k1 = (k1 << 31) | (k1 >> (64 - 31)); // ROTL64(k1, 31);
k1 *= c2;
h1 ^= k1;
break;
}
}
// finalization
h1 ^= (ulong)len;
h2 ^= (ulong)len;
h1 += h2;
h2 += h1;
h1 = FMix64(h1);
h2 = FMix64(h2);
h1 += h2;
h2 += h1;
var ret = new byte[16];
fixed (byte* pret = ret)
{
var ulpret = (ulong*)pret;
ulpret[0] = Reverse(h1);
ulpret[1] = Reverse(h2);
}
return ret;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong FMix64(ulong k)
{
k ^= k >> 33;
k *= 0xff51afd7ed558ccd;
k ^= k >> 33;
k *= 0xc4ceb9fe1a85ec53;
k ^= k >> 33;
return k;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint FMix32(uint h)
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong Reverse(ulong value) =>
((value & 0x00000000000000FFUL) << 56) | ((value & 0x000000000000FF00UL) << 40) |
((value & 0x0000000000FF0000UL) << 24) | ((value & 0x00000000FF000000UL) << 08) |
((value & 0x000000FF00000000UL) >> 08) | ((value & 0x0000FF0000000000UL) >> 24) |
((value & 0x00FF000000000000UL) >> 40) | ((value & 0xFF00000000000000UL) >> 56);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong Reverse(uint value) =>
((value & 0x000000FFU) << 24) | ((value & 0x0000FF00U) << 08) |
((value & 0x00FF0000U) >> 08) | ((value & 0xFF000000U) >> 24);
}
/// <summary>
/// Check yourself on <seealso href="http://murmurhash.shorelabs.com/"/>
/// </summary>
public class MurmurHash3Tests
{
[Theory]
[MemberData(nameof(ComputeHash32_Input))]
public void ComputeHash32(ReadOnlyMemory<byte> input, uint expected)
{
var hasher = new MurmurHash3();
var result = hasher.ComputeHash32(input.Span);
Assert.Equal(expected, result);
}
public static IEnumerable<object[]> ComputeHash32_Input() => new List<object[]> {
new object[]{ Encoding.UTF8.GetBytes("foo").AsMemory<byte>(), 4138058784 },
new object[]{ Encoding.UTF8.GetBytes("bar").AsMemory<byte>(), 1158584717 },
};
[Theory]
[MemberData(nameof(ComputeHash128_Input))]
public void ComputeHash128(ReadOnlyMemory<byte> input, string expected)
{
var hasher = new MurmurHash3();
var bytes = hasher.ComputeHash128(input.Span);
var hash = string.Concat(bytes.Select(x => x.ToString("x2")));
Assert.Equal(expected, hash);
}
public static IEnumerable<object[]> ComputeHash128_Input() => new List<object[]> {
new object[]{ Encoding.UTF8.GetBytes("foo").AsMemory<byte>(), "e271865701f545617eaf87e42bba7d87" },
new object[]{ Encoding.UTF8.GetBytes("bar").AsMemory<byte>(), "923658dbfd3ae604244fd74548bc56c0" },
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment