Skip to content

Instantly share code, notes, and snippets.

@mrbid
Last active January 2, 2023 07:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrbid/a33aa35b4f57ddc6812f351e11bb9349 to your computer and use it in GitHub Desktop.
Save mrbid/a33aa35b4f57ddc6812f351e11bb9349 to your computer and use it in GitHub Desktop.
AtomicBench - This is a performance based benchmark of atomic vs non-atomic increment operations.
/*
James William Fletcher (github.com/mrbid)
February 2022
This is a performance based benchmark of atomic vs non-atomic increment operations.
compile: gcc -std=gnu99 atomicbench.c -lpthread -lm -o atomicbench
*/
#include <stdio.h> // printf
#include <string.h> // memset
#include <pthread.h> // pthread_
#include <stdatomic.h> // atomic_
#include <x86intrin.h> // __rdtsc
#include <sys/time.h> // gettimeofday
#include <locale.h> // setlocale
#include <stdint.h> // uint64_t
#include <unistd.h> // sleep, nice
#include <errno.h>
#define VAR_TYPE unsigned short
#define VAR_TYPE_ATOMIC atomic_ushort
// #define VAR_TYPE unsigned int
// #define VAR_TYPE_ATOMIC atomic_uint
#define NUM_THREADS_MAX 32
#define NUM_THREAD_ITERATIONS 10000
pthread_t tid[NUM_THREADS_MAX];
/// regular
VAR_TYPE shared = 0;
VAR_TYPE private[NUM_THREADS_MAX] = {0};
void *increment_thread(void *arg)
{
for(unsigned int i = 0; i < NUM_THREAD_ITERATIONS; i++)
{
(*(VAR_TYPE*)arg)++;
shared++;
}
return 0;
}
/// atomic
VAR_TYPE_ATOMIC ashared = 0;
VAR_TYPE aprivate[NUM_THREADS_MAX] = {0};
void *increment_atomic_thread(void *arg)
{
for(unsigned int i = 0; i < NUM_THREAD_ITERATIONS; i++)
{
(*(VAR_TYPE*)arg)++;
ashared++;
}
return 0;
}
/// utils
uint64_t microtime()
{
struct timeval tv;
struct timezone tz;
memset(&tz, 0, sizeof(struct timezone));
gettimeofday(&tv, &tz);
return 1000000 * tv.tv_sec + tv.tv_usec;
}
/// main
VAR_TYPE antioptim = 0;
int main()
{
errno = 0;
if(nice(-20) < 0)
{
while(errno != 0)
{
errno = 0;
if(nice(-20) < 0)
printf("Attempting to set process to nice of -20 (run with sudo)...\n");
sleep(1);
}
}
setlocale(LC_NUMERIC, "");
int NUM_THREADS = 1;
if(atomic_is_lock_free(&ashared) == 1)
printf("Lock free atomic? YES\n");
else
printf("Lock free atomic? NO\n");
printf("\n");
uint64_t att = 0;
uint64_t attm = 0;
uint64_t tt = 0;
uint64_t ttm = 0;
for(NULL; NUM_THREADS <= NUM_THREADS_MAX; NUM_THREADS++)
{
// regular
for(unsigned int i = 0; i < NUM_THREAD_ITERATIONS; i++)
{
uint64_t stm = microtime();
uint64_t st = __rdtsc();
for(int i = 0; i < NUM_THREADS; i++)
{
if(pthread_create(&tid[i], NULL, increment_thread, &private[i]) != 0)
printf("failed to create thread %i\n", i);
}
for(int i = 0; i < NUM_THREADS; i++)
{
if(pthread_join(tid[i], NULL) != 0)
printf("failed to join thread %i\n", i);
}
tt += __rdtsc()-st;
ttm += microtime()-stm;
}
// atomic
for(unsigned int i = 0; i < NUM_THREAD_ITERATIONS; i++)
{
uint64_t astm = microtime();
uint64_t ast = __rdtsc();
for(int i = 0; i < NUM_THREADS; i++)
{
if(pthread_create(&tid[i], NULL, increment_atomic_thread, &aprivate[i]) != 0)
printf("failed to create thread %i\n", i);
}
for(int i = 0; i < NUM_THREADS; i++)
{
if(pthread_join(tid[i], NULL) != 0)
printf("failed to join thread %i\n", i);
}
att += __rdtsc()-ast;
attm += microtime()-astm;
}
// benchmark
att /= NUM_THREAD_ITERATIONS;
attm /= NUM_THREAD_ITERATIONS;
tt /= NUM_THREAD_ITERATIONS;
ttm /= NUM_THREAD_ITERATIONS;
const uint64_t bt1 = att / tt;
const uint64_t bt2 = attm / ttm;
// this loop is to break through any compiler optimisation,
// unused variables can be optimised out of existance by the compiler
// thus this loop gives them a superficial usage which is output
// to the console at [1]
for(int i = 0; i < NUM_THREADS; i++)
antioptim += private[i] + aprivate[i] + shared + ashared;
printf("Atomic is %'lux slower with %'i threads.\n", (bt1+bt2)/2, NUM_THREADS);
att = 0;
attm = 0;
tt = 0;
ttm = 0;
}
// done
printf("%c\n", antioptim); // [1]
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment