Created
July 12, 2023 18:05
-
-
Save anarazel/a34c08e54190e7f4ff2859c9cbc685f8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <pthread.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <unistd.h> | |
#include <signal.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdbool.h> | |
#ifndef SECS | |
#define SECS 5 | |
#endif | |
#ifndef NTHREADS | |
#define NTHREADS 40 | |
#endif | |
#ifndef ATOMIC_WIDTH | |
#define ATOMIC_WIDTH 64 | |
#endif | |
//#define ATOMIC_METHOD_RACY | |
//#define ATOMIC_METHOD_MUTEX | |
//#define ATOMIC_METHOD_ADD | |
//#define ATOMIC_METHOD_INC | |
//#define ATOMIC_METHOD_XADD | |
//#define ATOMIC_METHOD_CMPXCHG | |
//#define ATOMIC_METHOD_CMPXCHG16 | |
static pthread_barrier_t barrier; | |
#if ATOMIC_WIDTH==32 | |
typedef uint32_t atomic_width_t; | |
#else | |
typedef uint64_t atomic_width_t; | |
#endif | |
struct shared_data { | |
char pad0[64]; | |
atomic_width_t count; | |
#if defined(ATOMIC_METHOD_MUTEX) | |
pthread_mutex_t mutex; | |
#endif | |
char pad1[64]; | |
} shared_data; | |
typedef struct per_thread_data | |
{ | |
uint64_t count; | |
uint64_t retries; | |
} per_thread_data; | |
#if ATOMIC_WIDTH==32 | |
static inline void | |
atomic_add(uint32_t *val, uint32_t add_) | |
{ | |
__asm__ __volatile__( | |
" lock \n" | |
" add %1,%0 \n" | |
: "+m"(*val) | |
: "r" (add_) | |
: "memory", "cc"); | |
} | |
static inline void | |
atomic_inc(uint32_t *val) | |
{ | |
__asm__ __volatile__( | |
" lock \n" | |
" inc %0 \n" | |
: "+m"(*val) | |
: | |
: "memory", "cc"); | |
} | |
static inline bool | |
atomic_cmpxchg(volatile uint32_t *ptr, | |
uint32_t *expected, uint32_t newval) | |
{ | |
char ret; | |
/* | |
* Perform cmpxchg and use the zero flag which it implicitly sets when | |
* equal to measure the success. | |
*/ | |
__asm__ __volatile__( | |
" lock \n" | |
" cmpxchgl %4,%5 \n" | |
" setz %2 \n" | |
: "=a" (*expected), "=m"(*ptr), "=q" (ret) | |
: "a" (*expected), "r" (newval), "m"(*ptr) | |
: "memory", "cc"); | |
return (bool) ret; | |
} | |
static inline bool | |
non_atomic_cmpxchg(volatile uint32_t *ptr, | |
uint32_t *expected, uint32_t newval) | |
{ | |
char ret; | |
/* | |
* Perform cmpxchg and use the zero flag which it implicitly sets when | |
* equal to measure the success. | |
*/ | |
__asm__ __volatile__( | |
" cmpxchgl %4,%5 \n" | |
" setz %2 \n" | |
: "=a" (*expected), "=m"(*ptr), "=q" (ret) | |
: "a" (*expected), "r" (newval), "m"(*ptr) | |
: "memory", "cc"); | |
return (bool) ret; | |
} | |
#else | |
static inline void | |
atomic_add(uint64_t *val, uint64_t add_) | |
{ | |
__asm__ __volatile__( | |
" lock \n" | |
" addq %1,%0 \n" | |
: "+m"(*val) | |
: "r" (add_) | |
: "memory", "cc"); | |
} | |
static inline void | |
atomic_inc(uint64_t *val) | |
{ | |
__asm__ __volatile__( | |
" lock \n" | |
" incq %0 \n" | |
: "+m"(*val) | |
: | |
: "memory", "cc"); | |
} | |
#endif | |
void * | |
thread_main(void *p) | |
{ | |
per_thread_data *this_thread = (per_thread_data *) p; | |
pthread_barrier_wait(&barrier); | |
while (1) | |
{ | |
#if defined(ATOMIC_METHOD_RACY) | |
this_thread->count++; | |
shared_data.count++; | |
__asm__ __volatile__("" :::"memory"); | |
#elif defined(ATOMIC_METHOD_MUTEX) | |
pthread_mutex_lock(&shared_data.mutex); | |
this_thread->count++; | |
shared_data.count++; | |
pthread_mutex_unlock(&shared_data.mutex); | |
#elif defined(ATOMIC_METHOD_ADD) | |
this_thread->count++; | |
atomic_add(&shared_data.count, 1); | |
#elif defined(ATOMIC_METHOD_INC) | |
this_thread->count++; | |
atomic_inc(&shared_data.count); | |
#elif defined(ATOMIC_METHOD_XADD) | |
this_thread->count++; | |
__atomic_fetch_add(&shared_data.count, 1, __ATOMIC_SEQ_CST); | |
#elif defined(ATOMIC_METHOD_CMPXCHG) | |
uint64_t cur = shared_data.count; | |
this_thread->count++; | |
while (!__atomic_compare_exchange_n(&shared_data.count, &cur, cur + 1, | |
0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) | |
{ | |
this_thread->retries++; | |
} | |
#elif defined(ATOMIC_METHOD_CMPXCHG_ASM) | |
atomic_width_t cur = shared_data.count; | |
this_thread->count++; | |
while (!atomic_cmpxchg(&shared_data.count, &cur, cur + 1)) | |
{ | |
this_thread->retries++; | |
} | |
#elif defined(NON_ATOMIC_METHOD_CMPXCHG_ASM) | |
atomic_width_t cur = shared_data.count; | |
this_thread->count++; | |
while (!non_atomic_cmpxchg(&shared_data.count, &cur, cur + 1)) | |
{ | |
this_thread->retries++; | |
} | |
#else | |
#error gotta tell me what to do | |
#endif | |
} | |
} | |
static void | |
handle_sigint(int sig) | |
{ | |
pthread_exit(0); | |
} | |
int | |
main(int argc, char **argv) | |
{ | |
pthread_t threads[NTHREADS]; | |
per_thread_data *thread_data[NTHREADS]; | |
int secs = SECS; | |
signal(SIGINT, handle_sigint); | |
pthread_barrier_init(&barrier, NULL, NTHREADS+1); | |
#if defined(ATOMIC_METHOD_MUTEX) | |
pthread_mutex_init(&shared_data.mutex, NULL); | |
#endif | |
shared_data.count = 0; | |
for (int i = 0; i < NTHREADS; i++) | |
{ | |
thread_data[i] = aligned_alloc(4096, sizeof(per_thread_data)); | |
memset(thread_data[i], 0, sizeof(per_thread_data)); | |
pthread_create(&threads[i], NULL, thread_main, thread_data[i]); | |
} | |
pthread_barrier_wait(&barrier); | |
sleep(secs); | |
fprintf(stderr, "shared counter after 1s (before killing) is: %llu\n", | |
(long long unsigned) shared_data.count); | |
for (int i = 0; i < NTHREADS; i++) | |
{ | |
pthread_kill(threads[i], SIGINT); | |
} | |
for (int i = 0; i < NTHREADS; i++) | |
{ | |
pthread_join(threads[i], NULL); | |
} | |
uint64_t thread_count_sum = 0; | |
uint64_t thread_retries_sum = 0; | |
for (int i = 0; i < NTHREADS; i++) | |
{ | |
thread_count_sum += thread_data[i]->count; | |
thread_retries_sum += thread_data[i]->retries; | |
// XXX: compute stddev instead | |
fprintf(stderr, "thread %d: %llu\n", | |
i, (long long unsigned) thread_data[i]->count); | |
} | |
fprintf(stderr, "final counters after killing are: %llu, per-thread counters sum %llu, diff %lld (allowed <= %d), retries %llu\n", | |
(long long unsigned) shared_data.count, | |
(long long unsigned) thread_count_sum, | |
(long long) shared_data.count - (long long) thread_count_sum, | |
-NTHREADS, | |
(long long unsigned) thread_retries_sum); | |
fprintf(stderr, "throughput per thread: %.2fM/s, total: %.2fM/s\n", | |
((double) thread_count_sum / NTHREADS) / secs / 1000000, | |
((double) thread_count_sum) / secs / 1000000); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment