Last active
January 2, 2023 07:10
-
-
Save mrbid/a33aa35b4f57ddc6812f351e11bb9349 to your computer and use it in GitHub Desktop.
AtomicBench - This is a performance based benchmark of atomic vs non-atomic increment operations.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
James William Fletcher (github.com/mrbid) | |
February 2022 | |
This is a performance based benchmark of atomic vs non-atomic increment operations. | |
compile: gcc -std=gnu99 atomicbench.c -lpthread -lm -o atomicbench | |
*/ | |
#include <stdio.h> // printf | |
#include <string.h> // memset | |
#include <pthread.h> // pthread_ | |
#include <stdatomic.h> // atomic_ | |
#include <x86intrin.h> // __rdtsc | |
#include <sys/time.h> // gettimeofday | |
#include <locale.h> // setlocale | |
#include <stdint.h> // uint64_t | |
#include <unistd.h> // sleep, nice | |
#include <errno.h> | |
#define VAR_TYPE unsigned short | |
#define VAR_TYPE_ATOMIC atomic_ushort | |
// #define VAR_TYPE unsigned int | |
// #define VAR_TYPE_ATOMIC atomic_uint | |
#define NUM_THREADS_MAX 32 | |
#define NUM_THREAD_ITERATIONS 10000 | |
pthread_t tid[NUM_THREADS_MAX]; | |
/// regular | |
VAR_TYPE shared = 0; | |
VAR_TYPE private[NUM_THREADS_MAX] = {0}; | |
void *increment_thread(void *arg) | |
{ | |
for(unsigned int i = 0; i < NUM_THREAD_ITERATIONS; i++) | |
{ | |
(*(VAR_TYPE*)arg)++; | |
shared++; | |
} | |
return 0; | |
} | |
/// atomic | |
VAR_TYPE_ATOMIC ashared = 0; | |
VAR_TYPE aprivate[NUM_THREADS_MAX] = {0}; | |
void *increment_atomic_thread(void *arg) | |
{ | |
for(unsigned int i = 0; i < NUM_THREAD_ITERATIONS; i++) | |
{ | |
(*(VAR_TYPE*)arg)++; | |
ashared++; | |
} | |
return 0; | |
} | |
/// utils | |
uint64_t microtime() | |
{ | |
struct timeval tv; | |
struct timezone tz; | |
memset(&tz, 0, sizeof(struct timezone)); | |
gettimeofday(&tv, &tz); | |
return 1000000 * tv.tv_sec + tv.tv_usec; | |
} | |
/// main | |
VAR_TYPE antioptim = 0; | |
int main() | |
{ | |
errno = 0; | |
if(nice(-20) < 0) | |
{ | |
while(errno != 0) | |
{ | |
errno = 0; | |
if(nice(-20) < 0) | |
printf("Attempting to set process to nice of -20 (run with sudo)...\n"); | |
sleep(1); | |
} | |
} | |
setlocale(LC_NUMERIC, ""); | |
int NUM_THREADS = 1; | |
if(atomic_is_lock_free(&ashared) == 1) | |
printf("Lock free atomic? YES\n"); | |
else | |
printf("Lock free atomic? NO\n"); | |
printf("\n"); | |
uint64_t att = 0; | |
uint64_t attm = 0; | |
uint64_t tt = 0; | |
uint64_t ttm = 0; | |
for(NULL; NUM_THREADS <= NUM_THREADS_MAX; NUM_THREADS++) | |
{ | |
// regular | |
for(unsigned int i = 0; i < NUM_THREAD_ITERATIONS; i++) | |
{ | |
uint64_t stm = microtime(); | |
uint64_t st = __rdtsc(); | |
for(int i = 0; i < NUM_THREADS; i++) | |
{ | |
if(pthread_create(&tid[i], NULL, increment_thread, &private[i]) != 0) | |
printf("failed to create thread %i\n", i); | |
} | |
for(int i = 0; i < NUM_THREADS; i++) | |
{ | |
if(pthread_join(tid[i], NULL) != 0) | |
printf("failed to join thread %i\n", i); | |
} | |
tt += __rdtsc()-st; | |
ttm += microtime()-stm; | |
} | |
// atomic | |
for(unsigned int i = 0; i < NUM_THREAD_ITERATIONS; i++) | |
{ | |
uint64_t astm = microtime(); | |
uint64_t ast = __rdtsc(); | |
for(int i = 0; i < NUM_THREADS; i++) | |
{ | |
if(pthread_create(&tid[i], NULL, increment_atomic_thread, &aprivate[i]) != 0) | |
printf("failed to create thread %i\n", i); | |
} | |
for(int i = 0; i < NUM_THREADS; i++) | |
{ | |
if(pthread_join(tid[i], NULL) != 0) | |
printf("failed to join thread %i\n", i); | |
} | |
att += __rdtsc()-ast; | |
attm += microtime()-astm; | |
} | |
// benchmark | |
att /= NUM_THREAD_ITERATIONS; | |
attm /= NUM_THREAD_ITERATIONS; | |
tt /= NUM_THREAD_ITERATIONS; | |
ttm /= NUM_THREAD_ITERATIONS; | |
const uint64_t bt1 = att / tt; | |
const uint64_t bt2 = attm / ttm; | |
// this loop is to break through any compiler optimisation, | |
// unused variables can be optimised out of existance by the compiler | |
// thus this loop gives them a superficial usage which is output | |
// to the console at [1] | |
for(int i = 0; i < NUM_THREADS; i++) | |
antioptim += private[i] + aprivate[i] + shared + ashared; | |
printf("Atomic is %'lux slower with %'i threads.\n", (bt1+bt2)/2, NUM_THREADS); | |
att = 0; | |
attm = 0; | |
tt = 0; | |
ttm = 0; | |
} | |
// done | |
printf("%c\n", antioptim); // [1] | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment