Lancern/hazptr-bench.cpp Secret

## hazptr-bench.cpp
#include <algorithm>
#include <atomic>
#include <chrono>
#include <memory>
#include <iostream>
#include <random>
#include <thread>
#include <utility>
#include <vector>

#include "folly/synchronization/Hazptr-fwd.h"
#include "folly/synchronization/Hazptr.h"
#include "folly/synchronization/HazptrHolder.h"
#include "folly/synchronization/HazptrObj.h"

template <typename T>
static void DoNotOptimize(T&& obj) {
  asm volatile("" : : "r,m"(obj) : "memory");
}

template <typename F>
static std::chrono::nanoseconds BenchmarkOnce(F&& fn) noexcept {
  auto start_time = std::chrono::high_resolution_clock::now();
  fn();
  auto end_time = std::chrono::high_resolution_clock::now();
  return std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);
}

template <typename F>
static std::chrono::nanoseconds Benchmark10(F&& fn) noexcept {
  // Warm up.
  for (auto i = 0; i < 3; ++i) {
    fn();
  }

  std::chrono::nanoseconds total{0};
  for (auto i = 0; i < 10; ++i) {
    total += BenchmarkOnce(fn);
  }

  return total / 10;
}

class MultisetRc {
public:
  MultisetRc() noexcept : _head{nullptr} {}

  void insert(int value) {
    std::shared_ptr<Node> old_head = _head.load(std::memory_order_relaxed);
    std::shared_ptr<Node> node = std::make_shared<Node>(std::move(old_head), value);
    _head.store(std::move(node), std::memory_order_release);
  }

  bool erase(int value) noexcept {
    std::atomic<std::shared_ptr<Node>>* pos = &_head;
    std::shared_ptr<Node> node;

    while ((node = pos->load(std::memory_order_acquire))) {
      if (node->value == value) {
        std::shared_ptr<Node> node_next = node->next.load(std::memory_order_relaxed);
        pos->store(std::move(node_next), std::memory_order_relaxed);
        break;
      }
      pos = &node->next;
    }

    return node != nullptr;
  }

  bool find(int value) const noexcept {
    for (
      std::shared_ptr<Node> node = _head.load(std::memory_order_acquire);
      node;
      node = node->next.load(std::memory_order_acquire)
    ) {
      if (node->value == value) {
        return true;
      }
    }
    return false;
  }

private:
  struct Node {
    std::atomic<std::shared_ptr<Node>> next;
    int value;
  };

  std::atomic<std::shared_ptr<Node>> _head;
};

class MultisetHazptr {
public:
  MultisetHazptr() noexcept : _head{nullptr} {}

  void insert(int value) {
    Node* old_head = _head.load(std::memory_order_relaxed);
    Node* node = new Node{{}, old_head, value};
    _head.store(node, std::memory_order_release);
  }

  bool erase(int value) noexcept {
    std::atomic<Node*>* pos = &_head;
    Node* node;

    while ((node = pos->load(std::memory_order_acquire))) {
      if (node->value == value) {
        Node* node_next = node->next.load(std::memory_order_relaxed);
        pos->store(node_next, std::memory_order_relaxed);
        node->retire();
        break;
      }
      pos = &node->next;
    }

    return node != nullptr;
  }

  bool find(int value) const noexcept {
    auto hp = folly::make_hazard_pointer(folly::default_hazptr_domain());
    for (
      Node* node = hp.protect(_head);  // Acquire a hazard pointer to *_head
      node;
      node = hp.protect(node->next)    // Acquire a hazard pointer to *node->next and release the existing hazard pointer
    ) {
      if (node->value == value) {
        return true;
      }
    }
    return false;
  }

private:
  struct Node : folly::hazptr_obj_base<Node> {
    std::atomic<Node*> next;
    int value;
  };

  std::atomic<Node*> _head;
};

class MultisetBaseline {
public:
  MultisetBaseline() noexcept : _head{nullptr} {}

  void insert(int value) {
    _head = new Node{_head, value};
  }

  bool erase(int value) noexcept {
    Node** pos = &_head;
    Node* node;

    while ((node = *pos)) {
      if (node->value == value) {
        Node* node_next = node->next;
        *pos = node->next;
        delete node;
        break;
      }
      pos = &node->next;
    }

    return node != nullptr;
  }

  bool find(int value) const noexcept {
    for (Node* ptr = _head; ptr; ptr = ptr->next) {
      if (ptr->value == value) {
        return true;
      }
    }
    return false;
  }

private:
  struct Node {
    Node* next;
    int value;
  };

  Node* _head;
};

static void BenchmarkInsert() {
  auto bench = []<typename T>(int inserts) {
    return Benchmark10([inserts] {
      T d{};
      for (auto i = 0; i < inserts; ++i) {
        d.insert(i);
      }
      DoNotOptimize(d);
    });
  };

  constexpr int NUM_INSERTS[] { 1000, 10000, 100000 };

  std::cout << "multiset-hazptr insert:\n";
  for (auto x : NUM_INSERTS) {
    std::cout << x << ": " << bench.operator()<MultisetHazptr>(x) << "\n";
  }
  std::cout << "\n";

  std::cout << "multiset-rc insert:\n";
  for (auto x : NUM_INSERTS) {
    std::cout << x << ": " << bench.operator()<MultisetRc>(x) << "\n";
  }
  std::cout << "\n";

  std::cout << "multiset-baseline insert:\n";
  for (auto x : NUM_INSERTS) {
    std::cout << x << ": " << bench.operator()<MultisetBaseline>(x) << "\n";
  }
  std::cout << "\n";
}

static void BenchmarkErase() {
  auto bench = []<typename T>(int initial_size) {
    std::chrono::nanoseconds total{0};
    for (auto i = 0; i < 10; ++i) {
      T d{};
      for (auto j = 0; j < initial_size; ++j) {
        d.insert(j);
      }
      DoNotOptimize(d);
      total += BenchmarkOnce([&d, initial_size] {
        for (auto j = initial_size - 1; j >= 0; --j) {
          d.erase(j);
        }
        DoNotOptimize(d);
      });
    }
    return total / 10;
  };

  constexpr int INITIAL_SIZES[] { 1000, 10000, 100000 };

  std::cout << "multiset-hazptr erase:\n";
  for (auto x : INITIAL_SIZES) {
    std::cout << x << ": " << bench.operator()<MultisetHazptr>(x) << "\n";
  }
  std::cout << "\n";

  std::cout << "multiset-rc erase:\n";
  for (auto x : INITIAL_SIZES) {
    std::cout << x << ": " << bench.operator()<MultisetRc>(x) << "\n";
  }
  std::cout << "\n";

  std::cout << "multiset-baseline erase:\n";
  for (auto x : INITIAL_SIZES) {
    std::cout << x << ": " << bench.operator()<MultisetBaseline>(x) << "\n";
  }
  std::cout << "\n";
}

static void BenchmarkFind() {
  auto bench = []<typename T>(int num_threads) {
    constexpr int SIZE = 100;
    constexpr int NUM_FINDS = 10000;

    T d{};
    for (auto j = 0; j < SIZE; ++j) {
      d.insert(j);
    }
    DoNotOptimize(d);

    std::vector<std::thread> workers;
    workers.reserve(num_threads);
    std::atomic<std::int64_t> total_ns{0};
    for (auto tid = 0; tid < num_threads; ++tid) {
      workers.emplace_back([&d, &total_ns]() {
        auto dur = BenchmarkOnce([&d]() {
          for (auto i = 0; i < NUM_FINDS; ++i) {
            DoNotOptimize(d.find(i % SIZE));
          }
        });
        total_ns.fetch_add(dur.count());
      });
    }

    for (auto& t : workers) {
      t.join();
    }

    return std::chrono::nanoseconds{total_ns.load(std::memory_order_relaxed) / num_threads};
  };

  std::cout << "multiset-baseline find: " << bench.operator()<MultisetBaseline>(1) << "\n\n";

  constexpr int NUM_THREADS[] { 1, 2, 3, 5, 10 };

  std::cout << "multiset-rc find:\n";
  for (auto x : NUM_THREADS) {
    std::cout << x << ": " << bench.operator()<MultisetRc>(x) << "\n";
  }
  std::cout << "\n";

  std::cout << "multiset-hazard find:\n";
  for (auto x : NUM_THREADS) {
    std::cout << x << ": " << bench.operator()<MultisetHazptr>(x) << "\n";
  }
  std::cout << "\n";
}

static void BenchmarkConcurrentReadWrite() {
  auto bench = []<typename T>(int num_readers) {
    constexpr int NUM_WRITES = 10000;
    constexpr int NUM_READS = 10000;

    T d{};

    std::vector<int> order;
    order.reserve(NUM_WRITES / 2);
    for (auto i = 0; i < NUM_WRITES / 2; ++i) {
      order.push_back(i);
    }
    std::shuffle(order.begin(), order.end(), std::mt19937{});

    std::vector<std::thread> workers;
    workers.reserve(num_readers);

    std::int64_t writer_ns = 0;
    std::atomic<std::int64_t> reader_ns = 0;

    workers.emplace_back([&d, &order, &writer_ns]() {
      auto dur = BenchmarkOnce([&d, &order]() {
        for (auto x : order) {
          d.insert(x);
        }
        DoNotOptimize(d);
        for (auto i = 0; i < NUM_WRITES / 2; ++i) {
          d.erase(i);
        }
        DoNotOptimize(d);
      });
      writer_ns = dur.count();
    });

    for (auto i = 0; i < num_readers; ++i) {
      workers.emplace_back([&d, &reader_ns]() {
        auto dur = BenchmarkOnce([&d]() {
          for (auto i = 0; i < NUM_READS; ++i) {
            DoNotOptimize(d.find(i));
          }
        });
        reader_ns.fetch_add(dur.count(), std::memory_order_relaxed);
      });
    }

    for (auto& t : workers) {
      t.join();
    }

    return std::make_pair(std::chrono::nanoseconds{writer_ns}, std::chrono::nanoseconds{reader_ns.load(std::memory_order_relaxed) / num_readers});
  };

  constexpr int NUM_READERS[] { 1, 2, 3, 5, 10 };

  for (auto i = 0; i < 3; ++i) {
    bench.operator()<MultisetRc>(1);
  }

  std::cout << "multiset-rc concurrent:\n";
  for (auto x : NUM_READERS) {
    auto [writer_ns, reader_ns] = bench.operator()<MultisetRc>(x);
    std::cout << x << ": " << writer_ns << " " << reader_ns << "\n";
  }
  std::cout << "\n";

  for (auto i = 0; i < 3; ++i) {
    bench.operator()<MultisetHazptr>(1);
  }

  std::cout << "multiset-hazptr concurrent:\n";
  for (auto x : NUM_READERS) {
    auto [writer_ns, reader_ns] = bench.operator()<MultisetHazptr>(x);
    std::cout << x << ": " << writer_ns << " " << reader_ns << "\n";
  }
  std::cout << "\n";
}

int main() {
  // BenchmarkInsert();
  // BenchmarkErase();
  // BenchmarkFind();
  BenchmarkConcurrentReadWrite();

  return 0;
}
	#include <algorithm>
	#include <atomic>
	#include <chrono>
	#include <memory>
	#include <iostream>
	#include <random>
	#include <thread>
	#include <utility>
	#include <vector>

	#include "folly/synchronization/Hazptr-fwd.h"
	#include "folly/synchronization/Hazptr.h"
	#include "folly/synchronization/HazptrHolder.h"
	#include "folly/synchronization/HazptrObj.h"

	template <typename T>
	static void DoNotOptimize(T&& obj) {
	asm volatile("" : : "r,m"(obj) : "memory");
	}

	template <typename F>
	static std::chrono::nanoseconds BenchmarkOnce(F&& fn) noexcept {
	auto start_time = std::chrono::high_resolution_clock::now();
	fn();
	auto end_time = std::chrono::high_resolution_clock::now();
	return std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time);
	}

	template <typename F>
	static std::chrono::nanoseconds Benchmark10(F&& fn) noexcept {
	// Warm up.
	for (auto i = 0; i < 3; ++i) {
	fn();
	}

	std::chrono::nanoseconds total{0};
	for (auto i = 0; i < 10; ++i) {
	total += BenchmarkOnce(fn);
	}

	return total / 10;
	}

	class MultisetRc {
	public:
	MultisetRc() noexcept : _head{nullptr} {}

	void insert(int value) {
	std::shared_ptr<Node> old_head = _head.load(std::memory_order_relaxed);
	std::shared_ptr<Node> node = std::make_shared<Node>(std::move(old_head), value);
	_head.store(std::move(node), std::memory_order_release);
	}

	bool erase(int value) noexcept {
	std::atomic<std::shared_ptr<Node>>* pos = &_head;
	std::shared_ptr<Node> node;

	while ((node = pos->load(std::memory_order_acquire))) {
	if (node->value == value) {
	std::shared_ptr<Node> node_next = node->next.load(std::memory_order_relaxed);
	pos->store(std::move(node_next), std::memory_order_relaxed);
	break;
	}
	pos = &node->next;
	}

	return node != nullptr;
	}

	bool find(int value) const noexcept {
	for (
	std::shared_ptr<Node> node = _head.load(std::memory_order_acquire);
	node;
	node = node->next.load(std::memory_order_acquire)
	) {
	if (node->value == value) {
	return true;
	}
	}
	return false;
	}

	private:
	struct Node {
	std::atomic<std::shared_ptr<Node>> next;
	int value;
	};

	std::atomic<std::shared_ptr<Node>> _head;
	};

	class MultisetHazptr {
	public:
	MultisetHazptr() noexcept : _head{nullptr} {}

	void insert(int value) {
	Node* old_head = _head.load(std::memory_order_relaxed);
	Node* node = new Node{{}, old_head, value};
	_head.store(node, std::memory_order_release);
	}

	bool erase(int value) noexcept {
	std::atomic<Node> pos = &_head;
	Node* node;

	while ((node = pos->load(std::memory_order_acquire))) {
	if (node->value == value) {
	Node* node_next = node->next.load(std::memory_order_relaxed);
	pos->store(node_next, std::memory_order_relaxed);
	node->retire();
	break;
	}
	pos = &node->next;
	}

	return node != nullptr;
	}

	bool find(int value) const noexcept {
	auto hp = folly::make_hazard_pointer(folly::default_hazptr_domain());
	for (
	Node* node = hp.protect(_head); // Acquire a hazard pointer to *_head
	node;
	node = hp.protect(node->next) // Acquire a hazard pointer to *node->next and release the existing hazard pointer
	) {
	if (node->value == value) {
	return true;
	}
	}
	return false;
	}

	private:
	struct Node : folly::hazptr_obj_base<Node> {
	std::atomic<Node*> next;
	int value;
	};

	std::atomic<Node*> _head;
	};

	class MultisetBaseline {
	public:
	MultisetBaseline() noexcept : _head{nullptr} {}

	void insert(int value) {
	_head = new Node{_head, value};
	}

	bool erase(int value) noexcept {
	Node** pos = &_head;
	Node* node;

	while ((node = *pos)) {
	if (node->value == value) {
	Node* node_next = node->next;
	*pos = node->next;
	delete node;
	break;
	}
	pos = &node->next;
	}

	return node != nullptr;
	}

	bool find(int value) const noexcept {
	for (Node* ptr = _head; ptr; ptr = ptr->next) {
	if (ptr->value == value) {
	return true;
	}
	}
	return false;
	}

	private:
	struct Node {
	Node* next;
	int value;
	};

	Node* _head;
	};

	static void BenchmarkInsert() {
	auto bench = []<typename T>(int inserts) {
	return Benchmark10([inserts] {
	T d{};
	for (auto i = 0; i < inserts; ++i) {
	d.insert(i);
	}
	DoNotOptimize(d);
	});
	};

	constexpr int NUM_INSERTS[] { 1000, 10000, 100000 };

	std::cout << "multiset-hazptr insert:\n";
	for (auto x : NUM_INSERTS) {
	std::cout << x << ": " << bench.operator()<MultisetHazptr>(x) << "\n";
	}
	std::cout << "\n";

	std::cout << "multiset-rc insert:\n";
	for (auto x : NUM_INSERTS) {
	std::cout << x << ": " << bench.operator()<MultisetRc>(x) << "\n";
	}
	std::cout << "\n";

	std::cout << "multiset-baseline insert:\n";
	for (auto x : NUM_INSERTS) {
	std::cout << x << ": " << bench.operator()<MultisetBaseline>(x) << "\n";
	}
	std::cout << "\n";
	}

	static void BenchmarkErase() {
	auto bench = []<typename T>(int initial_size) {
	std::chrono::nanoseconds total{0};
	for (auto i = 0; i < 10; ++i) {
	T d{};
	for (auto j = 0; j < initial_size; ++j) {
	d.insert(j);
	}
	DoNotOptimize(d);
	total += BenchmarkOnce([&d, initial_size] {
	for (auto j = initial_size - 1; j >= 0; --j) {
	d.erase(j);
	}
	DoNotOptimize(d);
	});
	}
	return total / 10;
	};

	constexpr int INITIAL_SIZES[] { 1000, 10000, 100000 };

	std::cout << "multiset-hazptr erase:\n";
	for (auto x : INITIAL_SIZES) {
	std::cout << x << ": " << bench.operator()<MultisetHazptr>(x) << "\n";
	}
	std::cout << "\n";

	std::cout << "multiset-rc erase:\n";
	for (auto x : INITIAL_SIZES) {
	std::cout << x << ": " << bench.operator()<MultisetRc>(x) << "\n";
	}
	std::cout << "\n";

	std::cout << "multiset-baseline erase:\n";
	for (auto x : INITIAL_SIZES) {
	std::cout << x << ": " << bench.operator()<MultisetBaseline>(x) << "\n";
	}
	std::cout << "\n";
	}

	static void BenchmarkFind() {
	auto bench = []<typename T>(int num_threads) {
	constexpr int SIZE = 100;
	constexpr int NUM_FINDS = 10000;

	T d{};
	for (auto j = 0; j < SIZE; ++j) {
	d.insert(j);
	}
	DoNotOptimize(d);

	std::vector<std::thread> workers;
	workers.reserve(num_threads);
	std::atomic<std::int64_t> total_ns{0};
	for (auto tid = 0; tid < num_threads; ++tid) {
	workers.emplace_back([&d, &total_ns]() {
	auto dur = BenchmarkOnce([&d]() {
	for (auto i = 0; i < NUM_FINDS; ++i) {
	DoNotOptimize(d.find(i % SIZE));
	}
	});
	total_ns.fetch_add(dur.count());
	});
	}

	for (auto& t : workers) {
	t.join();
	}

	return std::chrono::nanoseconds{total_ns.load(std::memory_order_relaxed) / num_threads};
	};

	std::cout << "multiset-baseline find: " << bench.operator()<MultisetBaseline>(1) << "\n\n";

	constexpr int NUM_THREADS[] { 1, 2, 3, 5, 10 };

	std::cout << "multiset-rc find:\n";
	for (auto x : NUM_THREADS) {
	std::cout << x << ": " << bench.operator()<MultisetRc>(x) << "\n";
	}
	std::cout << "\n";

	std::cout << "multiset-hazard find:\n";
	for (auto x : NUM_THREADS) {
	std::cout << x << ": " << bench.operator()<MultisetHazptr>(x) << "\n";
	}
	std::cout << "\n";
	}

	static void BenchmarkConcurrentReadWrite() {
	auto bench = []<typename T>(int num_readers) {
	constexpr int NUM_WRITES = 10000;
	constexpr int NUM_READS = 10000;

	T d{};

	std::vector<int> order;
	order.reserve(NUM_WRITES / 2);
	for (auto i = 0; i < NUM_WRITES / 2; ++i) {
	order.push_back(i);
	}
	std::shuffle(order.begin(), order.end(), std::mt19937{});

	std::vector<std::thread> workers;
	workers.reserve(num_readers);

	std::int64_t writer_ns = 0;
	std::atomic<std::int64_t> reader_ns = 0;

	workers.emplace_back([&d, &order, &writer_ns]() {
	auto dur = BenchmarkOnce([&d, &order]() {
	for (auto x : order) {
	d.insert(x);
	}
	DoNotOptimize(d);
	for (auto i = 0; i < NUM_WRITES / 2; ++i) {
	d.erase(i);
	}
	DoNotOptimize(d);
	});
	writer_ns = dur.count();
	});

	for (auto i = 0; i < num_readers; ++i) {
	workers.emplace_back([&d, &reader_ns]() {
	auto dur = BenchmarkOnce([&d]() {
	for (auto i = 0; i < NUM_READS; ++i) {
	DoNotOptimize(d.find(i));
	}
	});
	reader_ns.fetch_add(dur.count(), std::memory_order_relaxed);
	});
	}

	for (auto& t : workers) {
	t.join();
	}

	return std::make_pair(std::chrono::nanoseconds{writer_ns}, std::chrono::nanoseconds{reader_ns.load(std::memory_order_relaxed) / num_readers});
	};

	constexpr int NUM_READERS[] { 1, 2, 3, 5, 10 };

	for (auto i = 0; i < 3; ++i) {
	bench.operator()<MultisetRc>(1);
	}

	std::cout << "multiset-rc concurrent:\n";
	for (auto x : NUM_READERS) {
	auto [writer_ns, reader_ns] = bench.operator()<MultisetRc>(x);
	std::cout << x << ": " << writer_ns << " " << reader_ns << "\n";
	}
	std::cout << "\n";

	for (auto i = 0; i < 3; ++i) {
	bench.operator()<MultisetHazptr>(1);
	}

	std::cout << "multiset-hazptr concurrent:\n";
	for (auto x : NUM_READERS) {
	auto [writer_ns, reader_ns] = bench.operator()<MultisetHazptr>(x);
	std::cout << x << ": " << writer_ns << " " << reader_ns << "\n";
	}
	std::cout << "\n";
	}

	int main() {
	// BenchmarkInsert();
	// BenchmarkErase();
	// BenchmarkFind();
	BenchmarkConcurrentReadWrite();

	return 0;
	}