Skip to content

Instantly share code, notes, and snippets.

@iscgar
Created July 6, 2018 12:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iscgar/630cf5f5d8dbb8b8bd1b05b4f8796d21 to your computer and use it in GitHub Desktop.
Save iscgar/630cf5f5d8dbb8b8bd1b05b4f8796d21 to your computer and use it in GitHub Desktop.
A C++11 constexpr implementation of 32-bit MurmurHash3
#include <stddef.h>
#include <stdint.h>
#include <limits>
#include <type_traits>
namespace detail
{
template<size_t R, class T>
static inline constexpr T rotl(const T v)
{
static_assert(std::is_unsigned<T>::value, "Can only rotate unsigned integral types");
return (v << (R % std::numeric_limits<T>::digits)) |
(v >> (std::numeric_limits<T>::digits - (R % std::numeric_limits<T>::digits)));
}
template<class T>
struct LittleEndianIndexer
{
static inline constexpr size_t index_of(size_t v) { return v; }
};
template<class T>
struct BigEndianIndexer
{
static inline constexpr size_t index_of(size_t v) { return (sizeof(T) - v - 1); }
};
template<class T, template<typename> class Indexer = LittleEndianIndexer>
struct IntegralByteView
{
static_assert(std::is_integral<T>::value, "IntegralByteView supports only integral types");
inline constexpr IntegralByteView(const T *const p, size_t n) : _p(p), _n(p ? n * sizeof(T) : 0) {}
inline constexpr size_t size() const { return _n; }
inline constexpr uint8_t byte_at(size_t n) const
{
return n < _n ?
uint8_t(_p[n / sizeof(T)] >> (Indexer<T>::index_of(n % sizeof(T)) * std::numeric_limits<uint8_t>::digits)) :
throw int();
}
private:
const T *const _p;
size_t _n;
};
template<class T> struct MurmurHash3;
template<>
struct MurmurHash3<uint32_t>
{
using value_type = uint32_t;
template<class T, template<typename> class Indexer>
inline constexpr MurmurHash3(const IntegralByteView<T, Indexer> &s, value_type seed) :
_value(fmix3(fmix2(fmix1(tail(body(seed, s, 0), s)))))
{
}
inline constexpr operator value_type() const { return _value; }
private:
static constexpr const uint32_t C1 = 0xcc9e2d51;
static constexpr const uint32_t C2 = 0x1b873593;
static constexpr const size_t BODY_CHUNK_SIZE = 256;
template<class T, template<typename> class Indexer>
static inline constexpr value_type get_block(const IntegralByteView<T, Indexer> &s, size_t offset)
{
return value_type(s.byte_at(offset)) | (value_type(s.byte_at(offset + 1)) << 8) |
(value_type(s.byte_at(offset + 2)) << 16) | (value_type(s.byte_at(offset + 3)) << 24);
}
template<class T, template<typename> class Indexer>
static inline constexpr value_type get_tail(const IntegralByteView<T, Indexer> &s, size_t offset)
{
return offset >= s.size() ? 0 :
value_type(s.byte_at(offset)) |
(s.size() - offset > 1 ? (value_type(s.byte_at(offset + 1)) << 8) : 0) |
(s.size() - offset > 2 ? (value_type(s.byte_at(offset + 2)) << 16) : 0);
}
static inline constexpr value_type process_block(value_type k1, value_type h)
{
return 0xe6546b64 + (5 * rotl<13>(h ^ (rotl<15>(k1 * C1) * C2)));
}
static inline constexpr value_type fmix1(value_type h)
{
return (h ^ h >> 16) * 0x85ebca6b;
}
static inline constexpr value_type fmix2(value_type h)
{
return (h ^ (h >> 13)) * 0xc2b2ae35;
}
static inline constexpr value_type fmix3(value_type h)
{
return h ^ (h >> 16);
}
template<class T, template<typename> class Indexer>
static inline constexpr value_type body_chunk(value_type h, const IntegralByteView<T, Indexer> &s, size_t offset, size_t left)
{
return left < 4 ? h : body_chunk(process_block(get_block(s, offset), h), s, offset + 4, left - 4);
}
template<class T, template<typename> class Indexer>
static inline constexpr value_type body(value_type h, const IntegralByteView<T, Indexer> &s, size_t offset)
{
return (offset > s.size() || s.size() - offset < 4) ? h :
body(body_chunk(h, s, offset, s.size() - offset >= BODY_CHUNK_SIZE ? BODY_CHUNK_SIZE : s.size() - offset),
s, offset + BODY_CHUNK_SIZE);
}
template<class T, template<typename> class Indexer>
static inline constexpr value_type tail(value_type h, const IntegralByteView<T, Indexer> &s)
{
return s.size() ^ h ^ (rotl<15>(get_tail(s, s.size() - (s.size() & 3)) * C1) * C2);
}
value_type _value;
};
}
template<class T, class U>
inline constexpr T murmur_hash3(const U *s, size_t n, const T seed = 0)
{
return detail::MurmurHash3<T>(detail::IntegralByteView<U>(s, n), seed);
}
namespace detail
{
inline constexpr const char* _strnul_chunk(const char *s, size_t l) { return (!l || !s || !*s) ? s : _strnul_chunk(s + 1, l - 1); }
inline constexpr const char* strnul(const char *s) { return (!s || !*s) ? s : strnul(_strnul_chunk(s + 1, 250)); }
inline constexpr size_t strlen(const char *s) { return strnul(s) - s; }
}
template<class T>
inline constexpr T murmur_hash3(const char *s, const T seed = 0)
{
return detail::MurmurHash3<T>(detail::IntegralByteView<char>(s, detail::strlen(s)), seed);
}
#include <stdio.h>
int main()
{
static const struct
{
uint8_t data[5];
size_t length;
uint32_t seed;
uint32_t expected;
} test_vectors_u8[] =
{
{ { }, 0, 0, 0 },
{ { }, 0, 1, 0x514E28B7 },
{ { }, 0, 0xffffffff, 0x81F16F39 },
{ { 0xFF, 0xFF, 0xFF, 0xFF }, 4, 0, 0x76293B50 },
{ { 0x21, 0x43, 0x65, 0x87 }, 4, 0, 0xF55B516B },
{ { 0x21, 0x43, 0x65, 0x87 }, 4, 0x5082EDEE, 0x2362F9DE },
{ { 0x21, 0x43, 0x65, }, 3, 0, 0x7E4A8634 },
{ { 0x21, 0x43 }, 2, 0, 0xA0F7B07A },
{ { 0x21 }, 1, 0, 0x72661CF4 },
{ { 00, 00, 00, 00 }, 4, 0, 0x2362F9DE },
{ { 00, 00, 00, }, 3, 0, 0x85F0B427 },
{ { 00, 00 }, 2, 0, 0x30F4C306 },
{ { 00 }, 1, 0, 0x514E28B7 },
};
for (auto &test : test_vectors_u8)
{
uint32_t const hash = murmur_hash3<uint32_t>(test.data, test.length, test.seed);
printf("%d, %08x: %08x == %08x (%s)\n", test.length, test.seed, hash, test.expected, hash == test.expected ? "ok" : "fail");
}
static const struct
{
const char *s;
uint32_t expected;
} test_vectors_char[] =
{
{"uhadrighea", 0x670f7f27},
{"iuhaeruigh", 0x215f6cee},
{"qeirugh", 0x3268ca3},
{"iuqwehgiu", 0x243bd696},
{"hwdfiug", 0xf3f4e343},
{"h845gwhr89gh3j6w", 0xbdd359da},
{"8978erthg", 0xf28f95a2},
{"aes5y$%JU^WRS^&*EJ%YGHIA", 0x5fa8197c},
{"jegAEJ%YJ", 0x31459048},
{"WR(^HJ", 0x9b530e01},
{"SFJN(", 0xc8381769},
{"ERJ^HJKDR^YJHSDI*F", 0xff1580c3},
{"JB*SEJTHI", 0x7e05a7f3},
{"$^JK", 0xad57f96e},
{"top7djSRTIJAS", 0xe699d7d3},
{"E%TJCA#(ARHKFYO<<>PJKU", 0xc046b1d3},
{"OZ,/TDKJSR56./,YJRTKJOPSJ$%HI", 0xee054684},
{"SCJ%IGJAEFGJ", 0x11f310d8},
{"ETH87,9\\[p[", 0xdcf11cd6},
{"ISRTJHIS", 0x1fffa759},
{"ROTNJH", 0x7826ea9b},
{"ISEN%GIQ^", 0xf364917c},
{"IURT^UIOEMR^Y", 0x64aafe4e},
{"N3564w57E%&IDTK7mrynjwR^KJ", 0x4905f46f}
};
for (auto &test : test_vectors_char)
{
uint32_t const hash = murmur_hash3<uint32_t>(test.s);
printf("%-30s: %08x == %08x (%s)\n", test.s, hash, test.expected, hash == test.expected ? "ok" : "fail");
}
static_assert(murmur_hash3<uint32_t>("Hello, world!") == 0xc0363e43, "fail");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment