Skip to content

Instantly share code, notes, and snippets.

@Tessil
Last active December 13, 2021 06:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Tessil/72e11891fc155f5b2eb53de22cbc4053 to your computer and use it in GitHub Desktop.
Save Tessil/72e11891fc155f5b2eb53de22cbc4053 to your computer and use it in GitHub Desktop.
HAT-trie benchmark
#include <iostream>
#include <fstream>
#include <vector>
#include <iterator>
#include <algorithm>
#include <string>
#include <chrono>
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#include <string.h>
#include <city.h>
struct str_hash {
std::size_t operator()(const char* key, std::size_t key_size) const {
return CityHash64(key, key_size);
}
std::size_t operator()(const std::string& key) const {
return CityHash64(key.c_str(), key.size());
}
};
using value_type = int;
#ifdef BENCH_STD_UNORDERED_MAP
#include <unordered_map>
using map_type = std::unordered_map<std::string, value_type, str_hash>;
map_type construct() {
return map_type();
}
void reserve(map_type& map, std::size_t nb_elements) {
map.reserve(nb_elements);
}
void insert(map_type& map, const std::string& str, value_type value) {
map.insert({str, value});
}
bool find(const map_type& map, const std::string& str) {
return map.find(str) != map.end();
}
void destroy(map_type& /*map*/) {
}
#elif BENCH_TSL_HOPSCOTCH_MAP
#include <hopscotch_map.h>
using map_type = tsl::hopscotch_map<std::string, value_type, str_hash>;
map_type construct() {
return map_type();
}
void reserve(map_type& map, std::size_t nb_elements) {
map.reserve(nb_elements);
}
void insert(map_type& map, const std::string& str, value_type value) {
map.insert({str, value});
}
bool find(const map_type& map, const std::string& str) {
return map.find(str) != map.end();
}
void destroy(map_type& /*map*/) {
}
#elif BENCH_GOOGLE_DENSE_MAP
#include <google/dense_hash_map>
using map_type = google::dense_hash_map<std::string, value_type, str_hash>;
map_type construct() {
map_type map;
map.set_empty_key("");
return map;
}
void reserve(map_type& map, std::size_t nb_elements) {
/*
* There is no reserve function.
*
* The following doesn't work well either, it reserves too much buckets
* (much more than the closest power of 2).
*
* map.rehash(std::size_t(std::ceil(float(nb_elements)/map.max_load_factor())));
*
* So use the constructor which take the number of elements to reserve in parameters.
*/
map_type map_tmp(nb_elements);
map_tmp.set_empty_key("");
map.swap(map_tmp);
}
void insert(map_type& map, const std::string& str, value_type value) {
map.insert({str, value});
}
bool find(const map_type& map, const std::string& str) {
return map.find(str) != map.end();
}
void destroy(map_type& /*map*/) {
}
#elif BENCH_SPP_SPARSE_HASH_MAP
#include <sparsepp/spp.h>
using map_type = spp::sparse_hash_map<std::string, value_type, str_hash>;
map_type construct() {
return map_type();
}
void reserve(map_type& map, std::size_t nb_elements) {
map.reserve(nb_elements);
}
void insert(map_type& map, const std::string& str, value_type value) {
map.insert({str, value});
}
bool find(const map_type& map, const std::string& str) {
return map.find(str) != map.end();
}
void destroy(map_type& /*map*/) {
}
#elif BENCH_TSL_ARRAY_MAP
#include <array_map.h>
using map_type = tsl::array_map<char, value_type, str_hash, tsl::str_equal_ah<char>, false>;
map_type construct() {
return map_type();
}
void reserve(map_type& map, std::size_t nb_elements) {
map.reserve(nb_elements);
}
void insert(map_type& map, const std::string& str, value_type value) {
map.insert(str, value);
}
bool find(const map_type& map, const std::string& str) {
return map.find(str) != map.end();
}
void destroy(map_type& /*map*/) {
}
#elif (BENCH_TSL_HTRIE_MAP || BENCH_TSL_HTRIE_MAP_LF_4 || BENCH_TSL_HTRIE_MAP_LF_2 || BENCH_TSL_HTRIE_MAP_LF_1)
#include <htrie_map.h>
using map_type = tsl::htrie_map<char, value_type, str_hash>;
map_type construct() {
map_type map;
#ifdef BENCH_TSL_HTRIE_MAP_LF_4
map.max_load_factor(4.0f);
#elif BENCH_TSL_HTRIE_MAP_LF_2
map.max_load_factor(2.0f);
#elif BENCH_TSL_HTRIE_MAP_LF_1
map.max_load_factor(1.0f);
#endif
return map;
}
void insert(map_type& map, const std::string& str, value_type value) {
map.insert(str, value);
}
bool find(const map_type& map, const std::string& str) {
return map.find(str) != map.end();
}
void destroy(map_type& /*map*/) {
}
#elif BENCH_DC_HAT_TRIE
#include <hat-trie/hat-trie.h>
using map_type = hattrie_t*;
map_type construct() {
return hattrie_create();
}
void insert(map_type& map, const std::string& str, value_type value) {
value_t* map_value = hattrie_get(map, str.c_str(), str.size());
*map_value = value;
}
bool find(const map_type& map, const std::string& str) {
value_t* map_value = hattrie_tryget(map, str.c_str(), str.size());
return map_value != nullptr;
}
void destroy(map_type& map) {
hattrie_free(map);
}
#elif BENCH_CEDAR
#include <cedar.h>
using map_type = cedar::da<value_type, -1, -2, true>;
#define NO_CONSTRUCT_METHOD
void insert(map_type& map, const std::string& str, value_type value) {
map.update(str.c_str(), str.size(), value);
}
bool find(const map_type& map, const std::string& str) {
return map.exactMatchSearch<value_type>(str.c_str(), str.size()) != -1;
}
void destroy(map_type& /*map*/) {
}
#elif BENCH_CEDAR_UNORDERED
#include <cedar.h>
using map_type = cedar::da<value_type, -1, -2, false>;
#define NO_CONSTRUCT_METHOD
void insert(map_type& map, const std::string& str, value_type value) {
map.update(str.c_str(), str.size(), value);
}
bool find(const map_type& map, const std::string& str) {
return map.exactMatchSearch<value_type>(str.c_str(), str.size()) != -1;
}
void destroy(map_type& /*map*/) {
}
#elif BENCH_CEDARPP
#include <cedarpp.h>
using map_type = cedar::da<value_type, -1, -2, true>;
#define NO_CONSTRUCT_METHOD
void insert(map_type& map, const std::string& str, value_type value) {
map.update(str.c_str(), str.size(), value);
}
bool find(const map_type& map, const std::string& str) {
return map.exactMatchSearch<value_type>(str.c_str(), str.size()) != -1;
}
void destroy(map_type& /*map*/) {
}
#elif BENCH_CEDARPP_UNORDERED
#include <cedarpp.h>
using map_type = cedar::da<value_type, -1, -2, false>;
#define NO_CONSTRUCT_METHOD
void insert(map_type& map, const std::string& str, value_type value) {
map.update(str.c_str(), str.size(), value);
}
bool find(const map_type& map, const std::string& str) {
return map.exactMatchSearch<value_type>(str.c_str(), str.size()) != -1;
}
void destroy(map_type& /*map*/) {
}
#elif BENCH_JUDY_SL
#include <Judy.h>
using map_type = Pvoid_t;
map_type construct() {
return nullptr;
}
void insert(map_type& map, const std::string& str, value_type value) {
PWord_t value_ptr;
JSLI(value_ptr, map, (const uint8_t*) str.c_str());
*value_ptr = value;
}
bool find(const map_type& map, const std::string& str) {
PWord_t value_ptr = 0;
JSLG(value_ptr, map, (const uint8_t*) str.c_str());
return value_ptr != nullptr;
}
void destroy(map_type& map) {
Word_t size;
JSLFA(size, map);
}
#elif BENCH_JUDY_HS
#include <Judy.h>
using map_type = Pvoid_t;
map_type construct() {
return nullptr;
}
void insert(map_type& map, std::string& str, value_type value) {
PWord_t value_ptr;
JHSI(value_ptr, map, (uint8_t*) str.c_str(), str.size());
*value_ptr = value;
}
bool find(const map_type& map, std::string& str) {
PWord_t value_ptr = 0;
JHSG(value_ptr, map, (uint8_t*) str.c_str(), str.size());
return value_ptr != nullptr;
}
void destroy(map_type& map) {
Word_t size;
JHSFA(size, map);
}
#elif (BENCH_QP_TRIE || BENCH_CRITBIT_TRIE)
#include <Tbl.h>
using map_type = Tbl*;
map_type construct() {
return nullptr;
}
void insert(map_type& map, const std::string& str, value_type value) {
// As the API of qp trie behaves quite differently than the other implementations,
// two ugly hacks are done to do a fair comparison.
/**
* The qp trie library doesn't make a copy of the string passed in parameter.
* It only stores the pointer to the key contrary to all the other implementations
* which do a copy of the string.
*
* The str parameter is only a temporary value, so we also do a manual copy.
* We store the copy pointer directly into the qp trie to avoid an additional structure
* (which would require sizeof(void*) extra bytes for each string), we need
* to call free() on the keys manually on destruction.
*/
char* str_copy = strndup(str.c_str(), str.size());
/**
* We can't store the int directly into the qp trie as it only take a
* void* parameter. We could allocate an int in the heap but it would require
* an extra allocation and take an extra sizeof(void*) bytes.
*
* As this would be unfair compared to the other structures, we store
* the value itself as an invalid pointer (with +1 to avoid a NULL value and
* a shift to avoid the alignement check). To retrieve the real value,
* we just have to do the opposite operation.
*/
void* value_as_ptr = (void*) (((uint64_t) value + 1) << 2);
map_type new_map_root = Tsetl(map, str_copy, str.size(), value_as_ptr);
if(new_map_root == nullptr) {
free(str_copy);
throw std::runtime_error("Couldn't perform insert in qp or critbit trie.");
}
map = new_map_root;
}
bool find(const map_type& map, const std::string& str) {
return Tgetl(map, str.c_str(), str.size()) != NULL;
}
void destroy(map_type& map) {
char* key = NULL;
std::size_t key_length = 0;
void* value = NULL;
while(Tnextl(map, &key, &key_length, &value)) {
map = Tdell(map, key, key_length);
free(key);
key = NULL;
key_length = 0;
value = NULL;
}
}
#else
#error "No defined test"
#endif
std::size_t get_memory_usage_bytes() {
std::ifstream file("/proc/self/statm");
std::size_t memory;
file >> memory; // Ignore first
file >> memory;
return memory * getpagesize();
}
template<typename T>
void bench_insert(T& map, const std::string& file_words_insert) {
std::ifstream file(file_words_insert);
if(!file.is_open()) {
throw std::runtime_error("Couldn't read " + file_words_insert + ".");
}
std::vector<std::string> lines;
std::copy(std::istream_iterator<std::string>(file), std::istream_iterator<std::string>(),
std::back_inserter(lines));
const size_t mem_start = get_memory_usage_bytes();
const auto chrono_start = std::chrono::high_resolution_clock::now();
#ifdef BENCH_RESERVE
reserve(map, lines.size());
#endif
int i = 0;
for(auto& line: lines) {
insert(map, line, i);
i++;
}
const auto chrono_end = std::chrono::high_resolution_clock::now();
const size_t mem_end = get_memory_usage_bytes();
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(chrono_end - chrono_start).count()
/ double(lines.size()) << " ns/key" << std::endl;
std::cout << double(mem_end - mem_start)/double(1024*1024) << " MiB (" << (mem_end - mem_start) << " bytes)." << std::endl;
}
template<typename T>
void bench_read(const T& map, const std::string& file_words_read) {
std::ifstream file(file_words_read);
if(!file.is_open()) {
throw std::runtime_error("Couldn't read " + file_words_read + ".");
}
std::vector<std::string> lines;
std::copy(std::istream_iterator<std::string>(file), std::istream_iterator<std::string>(),
std::back_inserter(lines));
const auto chrono_start = std::chrono::high_resolution_clock::now();
std::size_t nb_found = 0;
for(auto& line: lines) {
if(find(map, line)) {
nb_found++;
}
}
const auto chrono_end = std::chrono::high_resolution_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(chrono_end - chrono_start).count()
/ double(lines.size()) << " ns/key" << std::endl;
std::cout << "Found " << nb_found << " of " << lines.size() << " elements." << std::endl;
}
void bench(const std::string& file_words_insert, const std::string& file_words_read) {
#ifdef NO_CONSTRUCT_METHOD
map_type map;
#else
auto map = construct();
#endif
bench_insert(map, file_words_insert);
bench_read(map, file_words_read);
destroy(map);
}
void insert_no_measure(const std::string& file_words_insert) {
#ifdef NO_CONSTRUCT_METHOD
map_type map;
#else
auto map = construct();
#endif
std::ifstream file(file_words_insert);
if(!file.is_open()) {
throw std::runtime_error("Couldn't read " + file_words_insert + ".");
}
#ifdef BENCH_RESERVE
reserve(map, std::distance(std::istream_iterator<std::string>(file), std::istream_iterator<std::string>()));
file.clear();
file.seekg(0);
#endif
int i = 0;
std::string line;
while(file >> line) {
insert(map, line, i);
i++;
}
destroy(map);
}
int main(int argc, char** argv) {
if(argc == 2) {
insert_no_measure(argv[1]);
}
else if(argc == 3) {
bench(argv[1], argv[2]);
}
else {
std::cout << "Usage: " << argv[0] << " words_file_insert" << std::endl;
std::cout << "Usage: " << argv[0] << " words_file_insert words_file_read" << std::endl;
exit(1);
}
}
CXX=g++
CXXFLAGS=-lcityhash -std=c++11 -march=native -DNDEBUG -O3
WORDS_INSERT_FILE="./enwiki-20170320-all-titles-in-ns0"
WORDS_READ_FILE="./enwiki-20170320-all-titles-in-ns0"
QP_TRIE_LIB_PATH="./qp/"
all: compile run run_peak
compile:
$(CXX) $(CXXFLAGS) -o std_unordered_map -DBENCH_STD_UNORDERED_MAP bench.cpp
$(CXX) $(CXXFLAGS) -o std_unordered_map_reserve -DBENCH_STD_UNORDERED_MAP -DBENCH_RESERVE bench.cpp
$(CXX) $(CXXFLAGS) -o tsl_hopscotch_map -DBENCH_TSL_HOPSCOTCH_MAP bench.cpp
$(CXX) $(CXXFLAGS) -o tsl_hopscotch_map_reserve -DBENCH_TSL_HOPSCOTCH_MAP -DBENCH_RESERVE bench.cpp
$(CXX) $(CXXFLAGS) -o google_dense_map -DBENCH_GOOGLE_DENSE_MAP bench.cpp
$(CXX) $(CXXFLAGS) -o google_dense_map_reserve -DBENCH_GOOGLE_DENSE_MAP -DBENCH_RESERVE bench.cpp
$(CXX) $(CXXFLAGS) -o spp_sparsepp -DBENCH_SPP_SPARSE_HASH_MAP bench.cpp
$(CXX) $(CXXFLAGS) -o spp_sparsepp_reserve -DBENCH_SPP_SPARSE_HASH_MAP -DBENCH_RESERVE bench.cpp
$(CXX) $(CXXFLAGS) -o tsl_array_hash -DBENCH_TSL_ARRAY_MAP bench.cpp
$(CXX) $(CXXFLAGS) -o tsl_array_hash_reserve -DBENCH_TSL_ARRAY_MAP -DBENCH_RESERVE bench.cpp
$(CXX) $(CXXFLAGS) -o tsl_htrie_map -DBENCH_TSL_HTRIE_MAP bench.cpp
$(CXX) $(CXXFLAGS) -o tsl_htrie_map_lf_4 -DBENCH_TSL_HTRIE_MAP_LF_4 bench.cpp
$(CXX) $(CXXFLAGS) -o tsl_htrie_map_lf_2 -DBENCH_TSL_HTRIE_MAP_LF_2 bench.cpp
$(CXX) $(CXXFLAGS) -o tsl_htrie_map_lf_1 -DBENCH_TSL_HTRIE_MAP_LF_1 bench.cpp
$(CXX) $(CXXFLAGS) -o judy_sl -DBENCH_JUDY_SL -fpermissive -lJudy bench.cpp
$(CXX) $(CXXFLAGS) -o judy_hs -DBENCH_JUDY_HS -fpermissive -lJudy bench.cpp
$(CXX) $(CXXFLAGS) -o dc_hat_trie -DBENCH_DC_HAT_TRIE -fpermissive -lhat-trie bench.cpp
$(CXX) $(CXXFLAGS) -o cedar -DBENCH_CEDAR bench.cpp
$(CXX) $(CXXFLAGS) -o cedar_unordered -DBENCH_CEDAR_UNORDERED bench.cpp
$(CXX) $(CXXFLAGS) -o cedar_reduced -DBENCH_CEDAR -DUSE_REDUCED_TRIE bench.cpp
$(CXX) $(CXXFLAGS) -o cedar_unordered_reduced -DBENCH_CEDAR_UNORDERED -DUSE_REDUCED_TRIE bench.cpp
$(CXX) $(CXXFLAGS) -o cedarpp -DBENCH_CEDARPP bench.cpp
$(CXX) $(CXXFLAGS) -o cedarpp_unordered -DBENCH_CEDARPP_UNORDERED bench.cpp
$(CXX) $(CXXFLAGS) -o qp_trie -DBENCH_QP_TRIE -fpermissive $(QP_TRIE_LIB_PATH)/qp.c $(QP_TRIE_LIB_PATH)/Tbl.c bench.cpp
$(CXX) $(CXXFLAGS) -o critbit_trie -DBENCH_CRITBIT_TRIE -fpermissive $(QP_TRIE_LIB_PATH)/cb.c $(QP_TRIE_LIB_PATH)/Tbl.c bench.cpp
run:
./std_unordered_map $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./std_unordered_map_reserve $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./tsl_hopscotch_map $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./tsl_hopscotch_map_reserve $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./google_dense_map $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./google_dense_map_reserve $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./spp_sparsepp $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./spp_sparsepp_reserve $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./tsl_array_hash $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./tsl_array_hash_reserve $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./tsl_htrie_map $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./tsl_htrie_map_lf_4 $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./tsl_htrie_map_lf_2 $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./tsl_htrie_map_lf_1 $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./judy_sl $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./judy_hs $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./dc_hat_trie $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./cedar $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./cedar_unordered $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./cedar_reduced $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./cedar_unordered_reduced $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./cedarpp $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./cedarpp_unordered $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./qp_trie $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
./critbit_trie $(WORDS_INSERT_FILE) $(WORDS_READ_FILE)
run_peak:
/usr/bin/time -f "%M" ./std_unordered_map $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./std_unordered_map_reserve $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./tsl_hopscotch_map $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./tsl_hopscotch_map_reserve $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./google_dense_map $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./google_dense_map_reserve $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./spp_sparsepp $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./spp_sparsepp_reserve $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./tsl_array_hash $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./tsl_array_hash_reserve $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./tsl_htrie_map $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./tsl_htrie_map_lf_4 $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./tsl_htrie_map_lf_2 $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./tsl_htrie_map_lf_1 $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./judy_sl $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./judy_hs $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./dc_hat_trie $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./cedar $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./cedar_unordered $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./cedar_reduced $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./cedar_unordered_reduced $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./cedarpp $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./cedarpp_unordered $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./qp_trie $(WORDS_INSERT_FILE)
/usr/bin/time -f "%M" ./critbit_trie $(WORDS_INSERT_FILE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment