Skip to content

Instantly share code, notes, and snippets.

@simmplecoder
Created July 12, 2018 18:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simmplecoder/25982db4135c309601cdcde04f10d4e2 to your computer and use it in GitHub Desktop.
Save simmplecoder/25982db4135c309601cdcde04f10d4e2 to your computer and use it in GitHub Desktop.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// https://www.boost.org/LICENSE_1_0.txt)
#include <cassert>
#include <cstdint>
#include <cstring>
#include <functional>
#include <type_traits>
#include <unordered_set>
#include <utility>
#include <algorithm>
#include <cstddef>
template <typename Container>
using has_resize_t = decltype(std::declval<Container>().resize(std::size_t{}));
template <typename Container, typename = std::void_t<>>
struct has_resize : std::false_type {};
template <typename Container>
struct has_resize<Container, std::void_t<has_resize_t<Container>>> : std::true_type {};
namespace shino
{
struct bucket_t
{
std::uint32_t count;
std::uint32_t offset;
};
// This cannot be a std::pair<> because the default constructor is not trivial
template <typename ValT, typename HashT = std::uint64_t>
struct flatmap_element
{
HashT key;
ValT val;
};
struct flat_hashmap_canvas
{
std::byte* buffer;
std::size_t size;
flat_hashmap_canvas(std::byte* buffer, std::size_t size) :
buffer(buffer),
size(size)
{}
std::uint32_t bucket_count() const
{
return (reinterpret_cast<std::uint32_t*>(buffer))[0];
}
bucket_t* buckets() const
{
return reinterpret_cast<bucket_t*>(buffer + 8);
}
std::byte* payload() const
{
return buffer + sizeof(bucket_count()) + sizeof(bucket_t) * bucket_count();
}
};
template <typename ValT, typename HashT = std::uint64_t>
bool is_valid_canvas(flat_hashmap_canvas canvas)
{
return true;
}
// The flat hash table is meant to be used when a hash table is baked once,
// typically during a build process, and then used repeatadly. It can be
// initialized by simply pointing it at a memory location containing the
// raw data.
// The table does NOT maintain ownership over the data.
template <typename ValueType, typename KeyType, typename HashType = std::uint64_t>
class flat_hashmap_view
{
static_assert(std::is_trivial_v<ValueType>);
static_assert(std::is_trivial_v<HashType>);
using element_type = flatmap_element<ValueType, HashType>;
public:
explicit flat_hashmap_view(flat_hashmap_canvas canvas):
canvas(canvas),
bucket_count_(canvas.bucket_count())
{
if (canvas.size < sizeof(bucket_count_))
{
throw std::invalid_argument("invalid flat hash data");
}
std::byte const* read_ptr = canvas.buffer;
std::memcpy(&bucket_count_, read_ptr, sizeof(bucket_count_));
read_ptr += sizeof(bucket_count_);
if (canvas.size < sizeof(bucket_count_) + bucket_count_ * sizeof(bucket_t))
{
throw std::invalid_argument("invalid flat hash data");
}
if (std::uintptr_t(read_ptr) % alignof(bucket_t) != 0)
{
throw std::invalid_argument("flat hash data appears to be misaligned");
}
static_assert(std::is_trivially_constructible_v<bucket_t>);
buckets_ = new(const_cast<std::byte*>(read_ptr)) bucket_t[bucket_count_];
for (std::uint32_t i = 0; i < bucket_count_; ++i)
{
if (buckets_[i].offset + buckets_[i].count * sizeof(flatmap_element<ValueType, HashType>) > canvas.size)
{
throw std::invalid_argument("invalid flat hash data");
}
auto bucket_loc = mem_loc_ + buckets_[i].offset;
if (std::uintptr_t(bucket_loc) % alignof(flatmap_element<ValueType, HashType>) != 0)
{
throw std::invalid_argument("flat hash data appears to be misaligned");
}
static_assert(std::is_trivially_constructible_v<flatmap_element<ValueType, HashType>>);
new(const_cast<std::byte*>(bucket_loc)) element_type[buckets_[i].count];
}
}
// Lookup a value from the hash table, throws if the value is not
// present.
ValueType const& at(KeyType const& key)
{
HashType key_hash = std::hash<KeyType>{}(key);
auto const& bucket = canvas.buckets()[key_hash % bucket_count_];
if (bucket.count > 0)
{
auto elem_table =
reinterpret_cast<flatmap_element<ValueType, HashType> const*>(mem_loc_ + bucket.offset);
auto end = elem_table + bucket.count;
// Elements within a bucket are stored as a sorted vector, so we
// can do a binary search.
auto found = std::lower_bound(
elem_table, end, key_hash,
[](flatmap_element<ValueType, HashType> const& lhs, HashType const& rhs) { return lhs.key < rhs; });
if (found != end && found->key == key_hash)
{
return found->val;
}
}
throw std::out_of_range("element not present in flat hash table");
}
private:
std::byte const* mem_loc_;
const flat_hashmap_canvas canvas;
std::uint32_t bucket_count_;
bucket_t const* buckets_;
};
// Bakes a dataset into a flat_has_table raw data chunk.
template <typename KeyT, typename ValT, typename Container, typename HashT = std::uint64_t>
flat_hashmap_canvas make_flatmap_canvas(
std::vector<std::pair<KeyT, ValT>> const& data, Container& buffer)
{
static_assert(std::is_trivial_v<ValT>);
static_assert(std::is_trivial_v<HashT>);
// TODO: Better process to determine optimal bucket count.
std::uint32_t bucket_count = static_cast<uint32_t>(data.size() / 2 + 1);
std::vector<std::vector<flatmap_element<ValT, HashT>>> buckets(bucket_count);
{
// Keep track of seen hashes since we do not tolerate true collisions.
std::hash<KeyT> hasher;
std::unordered_set<HashT> hash_values_set;
hash_values_set.reserve(data.size());
for (auto const& d : data)
{
HashT hash_val = HashT(hasher(d.first));
if (hash_values_set.count(hash_val) != 0)
{
throw std::runtime_error(
"True hash collision in dataset, cannot make a flat hash table out "
"of it.");
}
hash_values_set.insert(hash_val);
buckets[hash_val % bucket_count].emplace_back(flatmap_element<ValT, HashT>{hash_val, d.second});
}
}
std::size_t header_mem_size = 0;
header_mem_size += sizeof(std::uint32_t); // for bucket_count
header_mem_size += sizeof(bucket_t) * bucket_count; // bucket table
// Make sure the actual value payloads is correctly aligned
constexpr auto elem_align = alignof(flatmap_element<ValT, HashT>);
static_assert((elem_align & (elem_align - 1)) == 0);
header_mem_size = (header_mem_size + (elem_align - 1)) & ~(elem_align - 1);
auto mem_size = header_mem_size + sizeof(flatmap_element<ValT, HashT>) * data.size();
bool is_enough_space = buffer.size() >= mem_size;
if (!is_enough_space)
{
if constexpr (has_resize<Container>{})
{
buffer.resize(mem_size);
}
else
{
throw std::invalid_argument("Statically sized container doesn't have enough space");
}
}
auto buffer_start = buffer.data();
std::byte* header_w_ptr = buffer_start;
std::byte* data_w_ptr = buffer_start + header_mem_size;
auto write = [data_boundary = buffer_start + buffer.size()](std::byte*& dst, auto const& v)
{
assert(dst + sizeof(v) <= data_boundary);
std::memcpy(dst, &v, sizeof(v));
dst += sizeof(v);
};
write(header_w_ptr, bucket_count);
for (auto& b : buckets)
{
std::sort(b.begin(), b.end(), [](auto const& lhs, auto const& rhs)
{
return lhs.key < rhs.key;
});
auto offset = data_w_ptr - buffer_start;
bucket_t bucket_header{std::uint32_t(b.size()), std::uint32_t(offset)};
write(header_w_ptr, bucket_header);
for (auto const& e : b)
{
write(data_w_ptr, e);
}
}
return {
static_cast<std::byte*>(buffer.data()),
buffer.size()
};
}
}
#include <iostream>
#include <string_view>
#include <vector>
int main() {
std::vector<std::pair<std::string, float>> raw_values = {
{ "hi", 12.0f },{ "yo", 10.0f },{ "sup", 3.0f },
};
std::array<std::byte, 1024> buffer{};
auto canvas = shino::make_flatmap_canvas(raw_values, buffer);
shino::flat_hashmap_view<float, std::string_view> values(canvas);
std::cout << values.at("yo") << "\n";
std::cout << values.at("sup") << "\n";
std::cout << values.at("hi") << "\n";
static_assert(has_resize<std::vector<char>>{});
static_assert(!has_resize<std::array<char, 12>>{});
}
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment