Skip to content

Instantly share code, notes, and snippets.

@Lancern
Created July 5, 2023 08:10
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Lancern/d1403d71e8256e166c3cabb1f7b5e194 to your computer and use it in GitHub Desktop.
Save Lancern/d1403d71e8256e166c3cabb1f7b5e194 to your computer and use it in GitHub Desktop.
Hazard pointer benchmark
#include <algorithm>
#include <atomic>
#include <chrono>
#include <memory>
#include <iostream>
#include <random>
#include <thread>
#include <utility>
#include <vector>
#include "folly/synchronization/Hazptr-fwd.h"
#include "folly/synchronization/Hazptr.h"
#include "folly/synchronization/HazptrHolder.h"
#include "folly/synchronization/HazptrObj.h"
template <typename T>
static void DoNotOptimize(T&& obj) {
asm volatile("" : : "r,m"(obj) : "memory");
}
template <typename F>
static std::chrono::nanoseconds BenchmarkOnce(F&& fn) noexcept {
auto start_time = std::chrono::high_resolution_clock::now();
fn();
auto end_time = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);
}
template <typename F>
static std::chrono::nanoseconds Benchmark10(F&& fn) noexcept {
// Warm up.
for (auto i = 0; i < 3; ++i) {
fn();
}
std::chrono::nanoseconds total{0};
for (auto i = 0; i < 10; ++i) {
total += BenchmarkOnce(fn);
}
return total / 10;
}
class MultisetRc {
public:
MultisetRc() noexcept : _head{nullptr} {}
void insert(int value) {
std::shared_ptr<Node> old_head = _head.load(std::memory_order_relaxed);
std::shared_ptr<Node> node = std::make_shared<Node>(std::move(old_head), value);
_head.store(std::move(node), std::memory_order_release);
}
bool erase(int value) noexcept {
std::atomic<std::shared_ptr<Node>>* pos = &_head;
std::shared_ptr<Node> node;
while ((node = pos->load(std::memory_order_acquire))) {
if (node->value == value) {
std::shared_ptr<Node> node_next = node->next.load(std::memory_order_relaxed);
pos->store(std::move(node_next), std::memory_order_relaxed);
break;
}
pos = &node->next;
}
return node != nullptr;
}
bool find(int value) const noexcept {
for (
std::shared_ptr<Node> node = _head.load(std::memory_order_acquire);
node;
node = node->next.load(std::memory_order_acquire)
) {
if (node->value == value) {
return true;
}
}
return false;
}
private:
struct Node {
std::atomic<std::shared_ptr<Node>> next;
int value;
};
std::atomic<std::shared_ptr<Node>> _head;
};
class MultisetHazptr {
public:
MultisetHazptr() noexcept : _head{nullptr} {}
void insert(int value) {
Node* old_head = _head.load(std::memory_order_relaxed);
Node* node = new Node{{}, old_head, value};
_head.store(node, std::memory_order_release);
}
bool erase(int value) noexcept {
std::atomic<Node*>* pos = &_head;
Node* node;
while ((node = pos->load(std::memory_order_acquire))) {
if (node->value == value) {
Node* node_next = node->next.load(std::memory_order_relaxed);
pos->store(node_next, std::memory_order_relaxed);
node->retire();
break;
}
pos = &node->next;
}
return node != nullptr;
}
bool find(int value) const noexcept {
auto hp = folly::make_hazard_pointer(folly::default_hazptr_domain());
for (
Node* node = hp.protect(_head); // Acquire a hazard pointer to *_head
node;
node = hp.protect(node->next) // Acquire a hazard pointer to *node->next and release the existing hazard pointer
) {
if (node->value == value) {
return true;
}
}
return false;
}
private:
struct Node : folly::hazptr_obj_base<Node> {
std::atomic<Node*> next;
int value;
};
std::atomic<Node*> _head;
};
class MultisetBaseline {
public:
MultisetBaseline() noexcept : _head{nullptr} {}
void insert(int value) {
_head = new Node{_head, value};
}
bool erase(int value) noexcept {
Node** pos = &_head;
Node* node;
while ((node = *pos)) {
if (node->value == value) {
Node* node_next = node->next;
*pos = node->next;
delete node;
break;
}
pos = &node->next;
}
return node != nullptr;
}
bool find(int value) const noexcept {
for (Node* ptr = _head; ptr; ptr = ptr->next) {
if (ptr->value == value) {
return true;
}
}
return false;
}
private:
struct Node {
Node* next;
int value;
};
Node* _head;
};
static void BenchmarkInsert() {
auto bench = []<typename T>(int inserts) {
return Benchmark10([inserts] {
T d{};
for (auto i = 0; i < inserts; ++i) {
d.insert(i);
}
DoNotOptimize(d);
});
};
constexpr int NUM_INSERTS[] { 1000, 10000, 100000 };
std::cout << "multiset-hazptr insert:\n";
for (auto x : NUM_INSERTS) {
std::cout << x << ": " << bench.operator()<MultisetHazptr>(x) << "\n";
}
std::cout << "\n";
std::cout << "multiset-rc insert:\n";
for (auto x : NUM_INSERTS) {
std::cout << x << ": " << bench.operator()<MultisetRc>(x) << "\n";
}
std::cout << "\n";
std::cout << "multiset-baseline insert:\n";
for (auto x : NUM_INSERTS) {
std::cout << x << ": " << bench.operator()<MultisetBaseline>(x) << "\n";
}
std::cout << "\n";
}
static void BenchmarkErase() {
auto bench = []<typename T>(int initial_size) {
std::chrono::nanoseconds total{0};
for (auto i = 0; i < 10; ++i) {
T d{};
for (auto j = 0; j < initial_size; ++j) {
d.insert(j);
}
DoNotOptimize(d);
total += BenchmarkOnce([&d, initial_size] {
for (auto j = initial_size - 1; j >= 0; --j) {
d.erase(j);
}
DoNotOptimize(d);
});
}
return total / 10;
};
constexpr int INITIAL_SIZES[] { 1000, 10000, 100000 };
std::cout << "multiset-hazptr erase:\n";
for (auto x : INITIAL_SIZES) {
std::cout << x << ": " << bench.operator()<MultisetHazptr>(x) << "\n";
}
std::cout << "\n";
std::cout << "multiset-rc erase:\n";
for (auto x : INITIAL_SIZES) {
std::cout << x << ": " << bench.operator()<MultisetRc>(x) << "\n";
}
std::cout << "\n";
std::cout << "multiset-baseline erase:\n";
for (auto x : INITIAL_SIZES) {
std::cout << x << ": " << bench.operator()<MultisetBaseline>(x) << "\n";
}
std::cout << "\n";
}
static void BenchmarkFind() {
auto bench = []<typename T>(int num_threads) {
constexpr int SIZE = 100;
constexpr int NUM_FINDS = 10000;
T d{};
for (auto j = 0; j < SIZE; ++j) {
d.insert(j);
}
DoNotOptimize(d);
std::vector<std::thread> workers;
workers.reserve(num_threads);
std::atomic<std::int64_t> total_ns{0};
for (auto tid = 0; tid < num_threads; ++tid) {
workers.emplace_back([&d, &total_ns]() {
auto dur = BenchmarkOnce([&d]() {
for (auto i = 0; i < NUM_FINDS; ++i) {
DoNotOptimize(d.find(i % SIZE));
}
});
total_ns.fetch_add(dur.count());
});
}
for (auto& t : workers) {
t.join();
}
return std::chrono::nanoseconds{total_ns.load(std::memory_order_relaxed) / num_threads};
};
std::cout << "multiset-baseline find: " << bench.operator()<MultisetBaseline>(1) << "\n\n";
constexpr int NUM_THREADS[] { 1, 2, 3, 5, 10 };
std::cout << "multiset-rc find:\n";
for (auto x : NUM_THREADS) {
std::cout << x << ": " << bench.operator()<MultisetRc>(x) << "\n";
}
std::cout << "\n";
std::cout << "multiset-hazard find:\n";
for (auto x : NUM_THREADS) {
std::cout << x << ": " << bench.operator()<MultisetHazptr>(x) << "\n";
}
std::cout << "\n";
}
static void BenchmarkConcurrentReadWrite() {
auto bench = []<typename T>(int num_readers) {
constexpr int NUM_WRITES = 10000;
constexpr int NUM_READS = 10000;
T d{};
std::vector<int> order;
order.reserve(NUM_WRITES / 2);
for (auto i = 0; i < NUM_WRITES / 2; ++i) {
order.push_back(i);
}
std::shuffle(order.begin(), order.end(), std::mt19937{});
std::vector<std::thread> workers;
workers.reserve(num_readers);
std::int64_t writer_ns = 0;
std::atomic<std::int64_t> reader_ns = 0;
workers.emplace_back([&d, &order, &writer_ns]() {
auto dur = BenchmarkOnce([&d, &order]() {
for (auto x : order) {
d.insert(x);
}
DoNotOptimize(d);
for (auto i = 0; i < NUM_WRITES / 2; ++i) {
d.erase(i);
}
DoNotOptimize(d);
});
writer_ns = dur.count();
});
for (auto i = 0; i < num_readers; ++i) {
workers.emplace_back([&d, &reader_ns]() {
auto dur = BenchmarkOnce([&d]() {
for (auto i = 0; i < NUM_READS; ++i) {
DoNotOptimize(d.find(i));
}
});
reader_ns.fetch_add(dur.count(), std::memory_order_relaxed);
});
}
for (auto& t : workers) {
t.join();
}
return std::make_pair(std::chrono::nanoseconds{writer_ns}, std::chrono::nanoseconds{reader_ns.load(std::memory_order_relaxed) / num_readers});
};
constexpr int NUM_READERS[] { 1, 2, 3, 5, 10 };
for (auto i = 0; i < 3; ++i) {
bench.operator()<MultisetRc>(1);
}
std::cout << "multiset-rc concurrent:\n";
for (auto x : NUM_READERS) {
auto [writer_ns, reader_ns] = bench.operator()<MultisetRc>(x);
std::cout << x << ": " << writer_ns << " " << reader_ns << "\n";
}
std::cout << "\n";
for (auto i = 0; i < 3; ++i) {
bench.operator()<MultisetHazptr>(1);
}
std::cout << "multiset-hazptr concurrent:\n";
for (auto x : NUM_READERS) {
auto [writer_ns, reader_ns] = bench.operator()<MultisetHazptr>(x);
std::cout << x << ": " << writer_ns << " " << reader_ns << "\n";
}
std::cout << "\n";
}
int main() {
// BenchmarkInsert();
// BenchmarkErase();
// BenchmarkFind();
BenchmarkConcurrentReadWrite();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment