Create a gist now

Instantly share code, notes, and snippets.

TLS - Thread Local Storage Benchmark Patch against Rapicorn
diff --git rcore/tests/multitest.cc rcore/tests/multitest.cc
index e8bed7a..0b985fd 100644
--- rcore/tests/multitest.cc
+++ rcore/tests/multitest.cc
@@ -15,6 +15,125 @@ using namespace Rapicorn;
#error RAPICORN_CHECK_VERSION() implementation is broken
#endif
+#define VOLATILE volatile
+
+extern "C" char*VOLATILE& get_local_ptr (char **p) RAPICORN_NOINLINE;
+extern "C" char*VOLATILE& get_static_ptr (char **p) RAPICORN_NOINLINE;
+extern "C" char*VOLATILE& get_static_tls (char **p) RAPICORN_NOINLINE;
+extern "C" char*VOLATILE& get_library_tls (char **p) RAPICORN_NOINLINE;
+extern "C" char*VOLATILE& get_function_tls (char **p) RAPICORN_NOINLINE;
+
+
+static void
+bench_thread_local_storage (bool do_static_test)
+{
+ constexpr uint64 runs = 1000000;
+
+ // stack/register pointer benchmark
+ Test::Timer rptr_timer (1);
+ auto rptr_loop = []() {
+ char *local_dummy = NULL, *validation_ptr = NULL;
+ assert (validation_ptr == get_local_ptr (&local_dummy));
+ for (uint64 i = 0; i < runs; i++)
+ {
+ if (validation_ptr != get_local_ptr (&local_dummy))
+ break;
+ validation_ptr++;
+ get_local_ptr (&local_dummy)++;
+ }
+ assert (validation_ptr == get_local_ptr (&local_dummy));
+ get_local_ptr (&local_dummy) = NULL;
+ };
+ const double rptr_min = rptr_timer.benchmark (rptr_loop);
+ printout ("Local pointer access (no TLS): %f seconds\n", rptr_min);
+
+ // static pointer benchmark
+ Test::Timer sptr_timer (1);
+ auto sptr_loop = []() {
+ char *local_dummy = NULL, *validation_ptr = NULL;
+ assert (validation_ptr == get_static_ptr (&local_dummy));
+ for (uint64 i = 0; i < runs; i++)
+ {
+ if (validation_ptr != get_static_ptr (&local_dummy))
+ break;
+ validation_ptr++;
+ get_static_ptr (&local_dummy)++;
+ }
+ assert (validation_ptr == get_static_ptr (&local_dummy));
+ get_static_ptr (&local_dummy) = NULL;
+ };
+ if (do_static_test)
+ {
+ const double sptr_min = sptr_timer.benchmark (sptr_loop);
+ printout ("Static pointer access (no TLS): %f seconds\n", sptr_min);
+ }
+
+ // thread-local pointer benchmark
+ Test::Timer stls_timer (1);
+ auto stls_loop = []() {
+ char *local_dummy = NULL, *validation_ptr = NULL;
+ assert (validation_ptr == get_static_tls (&local_dummy));
+ for (uint64 i = 0; i < runs; i++)
+ {
+ if (validation_ptr != get_static_tls (&local_dummy))
+ break;
+ validation_ptr++;
+ get_static_tls (&local_dummy)++;
+ }
+ assert (validation_ptr == get_static_tls (&local_dummy));
+ get_static_tls (&local_dummy) = NULL;
+ };
+ const double stls_min = stls_timer.benchmark (stls_loop);
+ printout ("Executable global TLS pointer access: %f seconds\n", stls_min);
+
+ // function thread-local pointer benchmark
+ Test::Timer ftls_timer (1);
+ auto ftls_loop = []() {
+ char *local_dummy = NULL, *validation_ptr = NULL;
+ assert (validation_ptr == get_function_tls (&local_dummy));
+ for (uint64 i = 0; i < runs; i++)
+ {
+ if (validation_ptr != get_function_tls (&local_dummy))
+ break;
+ validation_ptr++;
+ get_function_tls (&local_dummy)++;
+ }
+ assert (validation_ptr == get_function_tls (&local_dummy));
+ get_function_tls (&local_dummy) = NULL;
+ };
+ const double ftls_min = ftls_timer.benchmark (ftls_loop);
+ printout ("Executable function-local TLS pointer access: %f seconds\n", ftls_min);
+
+ // PIC-lib thread-local pointer benchmark
+ Test::Timer ltls_timer (1);
+ auto ltls_loop = []() {
+ char *local_dummy = NULL, *validation_ptr = NULL;
+ assert (validation_ptr == get_library_tls (&local_dummy));
+ for (uint64 i = 0; i < runs; i++)
+ {
+ if (validation_ptr != get_library_tls (&local_dummy))
+ break;
+ validation_ptr++;
+ get_library_tls (&local_dummy)++;
+ }
+ assert (validation_ptr == get_library_tls (&local_dummy));
+ get_library_tls (&local_dummy) = NULL;
+ };
+ const double ltls_min = ltls_timer.benchmark (ltls_loop);
+ printout ("Shared library TLS pointer access: %f seconds\n", ltls_min);
+}
+static void
+run_thread_local_bench()
+{
+ const size_t cpus = ThisThread::online_cpus();
+ vector<std::thread*> threads;
+ for (size_t i = 0; i < cpus; i++)
+ threads.push_back (new std::thread (bench_thread_local_storage, i == 0));
+ for (size_t i = 0; i < cpus; i++)
+ threads[i]->join();
+}
+REGISTER_TEST ("Threading/Thread-Local-Storage Benchmark", run_thread_local_bench);
+
static void
test_failing ()
{
diff --git rcore/tests/timebits.cc rcore/tests/timebits.cc
index 5b3a8d0..d54d70a 100644
--- rcore/tests/timebits.cc
+++ rcore/tests/timebits.cc
@@ -4,6 +4,15 @@
namespace {
using namespace Rapicorn;
+#define VOLATILE volatile
+
+static __thread char *VOLATILE global_static_tls_var = NULL;
+extern "C" RAPICORN_NOINLINE char*VOLATILE& get_local_ptr (char **p) { return *p; }
+extern "C" RAPICORN_NOINLINE char*VOLATILE& get_static_ptr (char **p) { static char *VOLATILE ptr = NULL; return ptr; }
+extern "C" RAPICORN_NOINLINE char*VOLATILE& get_static_tls (char **p) { return global_static_tls_var; }
+extern "C" RAPICORN_NOINLINE char*VOLATILE& get_library_tls (char **p) { return *(char*VOLATILE*) &Test::Timer::so_library_tls_var; }
+extern "C" RAPICORN_NOINLINE char*VOLATILE& get_function_tls (char **p) { static __thread char *VOLATILE ptr = NULL; return ptr; }
+
static inline uint32
quick_rand32 (void)
{
diff --git rcore/testutils.cc rcore/testutils.cc
index 263516c..ef6304d 100644
--- rcore/testutils.cc
+++ rcore/testutils.cc
@@ -19,6 +19,8 @@ namespace Rapicorn {
*/
namespace Test {
+char __thread * volatile Timer::so_library_tls_var = NULL;
+
Timer::Timer (double deadline_in_secs) :
deadline_ (deadline_in_secs), test_duration_ (0), n_runs_ (0)
{}
diff --git rcore/testutils.hh rcore/testutils.hh
index c59b1ec..1a20fb5 100644
--- rcore/testutils.hh
+++ rcore/testutils.hh
@@ -58,6 +58,7 @@ public:
double max_elapsed () const; ///< Maximum time benchmarked for a @a callee() call.
template<typename Callee>
double benchmark (Callee callee);
+ static char __thread * volatile so_library_tls_var;
};
/**
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment