Skip to content

Instantly share code, notes, and snippets.

@Florianjw

Florianjw/main.cpp

Created Jan 21, 2019
Embed
What would you like to do?
Risky Hash
#include <array>
#include <cstddef>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <vector>
/*
Copyright: Boaz Segev 2019, Florian Weber 2019
License: MIT
*/
template <unsigned bits, typename Unsigned>
inline Unsigned lrot(Unsigned value) {
constexpr auto u_bits = sizeof(Unsigned) * 8u;
static_assert(bits < u_bits);
return (value << bits) bitor (value >> (u_bits - bits));
}
using u8 = std::uint8_t;
template <typename Unsigned>
inline Unsigned read_be(const std::byte* raw) {
auto ret = Unsigned{};
for (auto i = 0u; i < sizeof(Unsigned); ++i) {
ret <<= 8u;
ret |= std::to_integer<Unsigned>(raw[i]);
}
return ret;
}
template <typename Unsigned>
inline Unsigned read_some_le(const std::byte* raw, std::size_t n) {
auto ret = Unsigned{};
for (auto i = 0u; i < n; ++i) {
ret |= std::to_integer<Unsigned>(raw[i]) << ((n - i) * 8u);
}
return ret;
}
using u64 = std::uint64_t;
using u8 = std::uint8_t;
/* The primes used by Risky Hash */
const auto prime_0 = u64{0xFBBA3FA15B22113B};
const auto prime_1 = u64{0xAB137439982B86C9};
inline u64 risky_hash_round(u64 w, u64 state) { return (lrot<33>(state xor w) + w) * prime_0; }
u64 risky_hash(const u64* data, std::size_t len, std::size_t byte_len, u64 seed) {
auto state = std::array<u64, 4>{
seed ^ prime_1,
~seed + prime_1,
lrot<17u>(seed) ^ prime_1,
lrot<33>(seed) + prime_1,
};
/* consume 64bit blocks */
for (auto i = std::size_t{}; i < len; ++i) {
state[i % 4u] = risky_hash_round(data[i], state[i % 4u]);
}
/* merge and mix */
auto result =
lrot<17>(state[0]) + lrot<13>(state[1]) + lrot<47>(state[2]) + lrot<57>(state[3]);
result += byte_len + state[0] * prime_1;
result ^= lrot<13>(result);
result += state[1] * prime_1;
result ^= lrot<29>(result);
result += state[2] * prime_1;
result ^= lrot<33>(result);
result += state[3] * prime_1;
result ^= lrot<51>(result);
/* irreversible avalanche... I think */
result ^= (result >> 29) * prime_0;
return result;
}
std::vector<u64> pad(const std::byte* data, std::size_t len) {
auto ret = std::vector<u64>{};
const auto words = len / sizeof(u64);
const auto bytes = len % sizeof(u64);
ret.reserve(words + (bytes != 0 ? 1 : 0));
for (auto i = std::size_t{}; i < words; ++i) {
ret.push_back(read_be<u64>(data));
data += sizeof(u64);
}
if (bytes != 0u) {
ret.push_back(read_some_le<u64>(data, bytes));
}
return ret;
}
int main() {
const auto str = std::string{"Some somewhat long string"};
const auto seed = std::uint64_t{0x1234567891827364ul};
const auto padded_str = pad(reinterpret_cast<const std::byte*>(str.data()), str.size());
const auto hash = risky_hash(padded_str.data(), padded_str.size(), str.size(), seed);
std::cout << "0x" << std::hex << std::setfill('0') << std::setw(16) << hash << '\n';
const auto expected = 0x2e61c0f8ddf7d707ul;
std::cout << ((hash == expected) ? "correct\n" : "WRONG!\n");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment