Skip to content

Instantly share code, notes, and snippets.

@anarazel
Created July 12, 2023 18:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anarazel/a34c08e54190e7f4ff2859c9cbc685f8 to your computer and use it in GitHub Desktop.
Save anarazel/a34c08e54190e7f4ff2859c9cbc685f8 to your computer and use it in GitHub Desktop.
#include <pthread.h>
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#ifndef SECS
#define SECS 5
#endif
#ifndef NTHREADS
#define NTHREADS 40
#endif
#ifndef ATOMIC_WIDTH
#define ATOMIC_WIDTH 64
#endif
//#define ATOMIC_METHOD_RACY
//#define ATOMIC_METHOD_MUTEX
//#define ATOMIC_METHOD_ADD
//#define ATOMIC_METHOD_INC
//#define ATOMIC_METHOD_XADD
//#define ATOMIC_METHOD_CMPXCHG
//#define ATOMIC_METHOD_CMPXCHG16
static pthread_barrier_t barrier;
#if ATOMIC_WIDTH==32
typedef uint32_t atomic_width_t;
#else
typedef uint64_t atomic_width_t;
#endif
struct shared_data {
char pad0[64];
atomic_width_t count;
#if defined(ATOMIC_METHOD_MUTEX)
pthread_mutex_t mutex;
#endif
char pad1[64];
} shared_data;
typedef struct per_thread_data
{
uint64_t count;
uint64_t retries;
} per_thread_data;
#if ATOMIC_WIDTH==32
static inline void
atomic_add(uint32_t *val, uint32_t add_)
{
__asm__ __volatile__(
" lock \n"
" add %1,%0 \n"
: "+m"(*val)
: "r" (add_)
: "memory", "cc");
}
static inline void
atomic_inc(uint32_t *val)
{
__asm__ __volatile__(
" lock \n"
" inc %0 \n"
: "+m"(*val)
:
: "memory", "cc");
}
static inline bool
atomic_cmpxchg(volatile uint32_t *ptr,
uint32_t *expected, uint32_t newval)
{
char ret;
/*
* Perform cmpxchg and use the zero flag which it implicitly sets when
* equal to measure the success.
*/
__asm__ __volatile__(
" lock \n"
" cmpxchgl %4,%5 \n"
" setz %2 \n"
: "=a" (*expected), "=m"(*ptr), "=q" (ret)
: "a" (*expected), "r" (newval), "m"(*ptr)
: "memory", "cc");
return (bool) ret;
}
static inline bool
non_atomic_cmpxchg(volatile uint32_t *ptr,
uint32_t *expected, uint32_t newval)
{
char ret;
/*
* Perform cmpxchg and use the zero flag which it implicitly sets when
* equal to measure the success.
*/
__asm__ __volatile__(
" cmpxchgl %4,%5 \n"
" setz %2 \n"
: "=a" (*expected), "=m"(*ptr), "=q" (ret)
: "a" (*expected), "r" (newval), "m"(*ptr)
: "memory", "cc");
return (bool) ret;
}
#else
static inline void
atomic_add(uint64_t *val, uint64_t add_)
{
__asm__ __volatile__(
" lock \n"
" addq %1,%0 \n"
: "+m"(*val)
: "r" (add_)
: "memory", "cc");
}
static inline void
atomic_inc(uint64_t *val)
{
__asm__ __volatile__(
" lock \n"
" incq %0 \n"
: "+m"(*val)
:
: "memory", "cc");
}
#endif
void *
thread_main(void *p)
{
per_thread_data *this_thread = (per_thread_data *) p;
pthread_barrier_wait(&barrier);
while (1)
{
#if defined(ATOMIC_METHOD_RACY)
this_thread->count++;
shared_data.count++;
__asm__ __volatile__("" :::"memory");
#elif defined(ATOMIC_METHOD_MUTEX)
pthread_mutex_lock(&shared_data.mutex);
this_thread->count++;
shared_data.count++;
pthread_mutex_unlock(&shared_data.mutex);
#elif defined(ATOMIC_METHOD_ADD)
this_thread->count++;
atomic_add(&shared_data.count, 1);
#elif defined(ATOMIC_METHOD_INC)
this_thread->count++;
atomic_inc(&shared_data.count);
#elif defined(ATOMIC_METHOD_XADD)
this_thread->count++;
__atomic_fetch_add(&shared_data.count, 1, __ATOMIC_SEQ_CST);
#elif defined(ATOMIC_METHOD_CMPXCHG)
uint64_t cur = shared_data.count;
this_thread->count++;
while (!__atomic_compare_exchange_n(&shared_data.count, &cur, cur + 1,
0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
{
this_thread->retries++;
}
#elif defined(ATOMIC_METHOD_CMPXCHG_ASM)
atomic_width_t cur = shared_data.count;
this_thread->count++;
while (!atomic_cmpxchg(&shared_data.count, &cur, cur + 1))
{
this_thread->retries++;
}
#elif defined(NON_ATOMIC_METHOD_CMPXCHG_ASM)
atomic_width_t cur = shared_data.count;
this_thread->count++;
while (!non_atomic_cmpxchg(&shared_data.count, &cur, cur + 1))
{
this_thread->retries++;
}
#else
#error gotta tell me what to do
#endif
}
}
static void
handle_sigint(int sig)
{
pthread_exit(0);
}
int
main(int argc, char **argv)
{
pthread_t threads[NTHREADS];
per_thread_data *thread_data[NTHREADS];
int secs = SECS;
signal(SIGINT, handle_sigint);
pthread_barrier_init(&barrier, NULL, NTHREADS+1);
#if defined(ATOMIC_METHOD_MUTEX)
pthread_mutex_init(&shared_data.mutex, NULL);
#endif
shared_data.count = 0;
for (int i = 0; i < NTHREADS; i++)
{
thread_data[i] = aligned_alloc(4096, sizeof(per_thread_data));
memset(thread_data[i], 0, sizeof(per_thread_data));
pthread_create(&threads[i], NULL, thread_main, thread_data[i]);
}
pthread_barrier_wait(&barrier);
sleep(secs);
fprintf(stderr, "shared counter after 1s (before killing) is: %llu\n",
(long long unsigned) shared_data.count);
for (int i = 0; i < NTHREADS; i++)
{
pthread_kill(threads[i], SIGINT);
}
for (int i = 0; i < NTHREADS; i++)
{
pthread_join(threads[i], NULL);
}
uint64_t thread_count_sum = 0;
uint64_t thread_retries_sum = 0;
for (int i = 0; i < NTHREADS; i++)
{
thread_count_sum += thread_data[i]->count;
thread_retries_sum += thread_data[i]->retries;
// XXX: compute stddev instead
fprintf(stderr, "thread %d: %llu\n",
i, (long long unsigned) thread_data[i]->count);
}
fprintf(stderr, "final counters after killing are: %llu, per-thread counters sum %llu, diff %lld (allowed <= %d), retries %llu\n",
(long long unsigned) shared_data.count,
(long long unsigned) thread_count_sum,
(long long) shared_data.count - (long long) thread_count_sum,
-NTHREADS,
(long long unsigned) thread_retries_sum);
fprintf(stderr, "throughput per thread: %.2fM/s, total: %.2fM/s\n",
((double) thread_count_sum / NTHREADS) / secs / 1000000,
((double) thread_count_sum) / secs / 1000000);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment