Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Example program to demonstrate false sharing between threads
/*
* This is an example program to demonstrate false sharing between threads.
*
* It can be compiled two ways:
* gcc -g false_sharing.c -Wall -pthread -lnuma -o false_sharing
* gcc -g false_sharing.c -Wall -pthread -lnuma -DNO_FALSE_SHARING -o no_false_sharing
*
* The -DNO_FALSE_SHARING macro reduces the false sharing.
*
* The usage is:
* ./false_sharing <number of threads in a NUMA node>
* ./no_false_sharing <number of threads in a NUMA node>
*/
#define _MULTI_THREADED
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
#include <sys/types.h>
#include <assert.h>
#include <numa.h>
#define LOOP_CNT (100 * 1024 * 1024)
static __inline__ uint64_t rdtsc() {
uint64_t hi, lo;
__asm__ __volatile__ ( "rdtsc" : "=a"(lo), "=d"(hi));
return lo | (hi << 32);
}
#ifdef NO_FALSE_SHARING
#define ALIGN 128
#else
#define ALIGN sizeof(uint64_t)
#endif
struct padded_long {
uint64_t value;
} __attribute__((aligned (ALIGN)));
struct thread_data {
pthread_t tid;
int numa_node;
volatile uint64_t* idx;
volatile int running;
};
volatile int all_running = 0;
#define check_result(string, val) { \
if (val) { \
printf("Failed with %d at %s", val, string); \
exit(1); \
} \
}
void* read_write_func(void* arg) {
struct thread_data* t = (struct thread_data*)arg;
numa_run_on_node(t->numa_node);
t->running = 1;
while (!all_running) ;
uint64_t start = rdtsc();
for (int i = 0; i < LOOP_CNT; ++i) {
*t->idx += i;
}
uint64_t stop = rdtsc();
printf("%lu mticks, node %d, cpu %d\n", (stop - start) / 1000000, t->numa_node, sched_getcpu());
return NULL;
}
int main(int argc, char *argv[]) {
if (argc != 2) {
printf( "usage: %s <n>\n", argv[0] );
printf( "where \"n\" is the number of threads per NUMA node\n");
exit(1);
}
int num_nodes = numa_max_node() + 1;
int num_threads = atoi(argv[1]);
num_threads = num_threads > 1 ? num_threads * num_nodes : 2;
struct thread_data* threads = (struct thread_data*) malloc(sizeof(struct thread_data) * num_threads);
struct padded_long* longs = (struct padded_long*) malloc(sizeof(struct padded_long) * num_threads);
assert((uint64_t)&longs[1] - (uint64_t)&longs[0] == ALIGN);
for (int i = 0; i < num_threads; ++i) {
threads[i].idx = &longs[i].value;
threads[i].numa_node = i % num_nodes;
threads[i].running = 0;
int rc = pthread_create(&threads[i].tid, NULL, read_write_func, &threads[i]);
check_result("pthread_create()\n", rc);
}
for (int i = 0; i < num_threads; ++i) {
while (!threads[i].running) ;
}
all_running = 1;
uint64_t start = rdtsc();
for (int i = 0; i < num_threads; i++) {
int rc = pthread_join(threads[i].tid, NULL);
check_result("pthread_join()\n", rc);
}
uint64_t stop = rdtsc();
int cpu = sched_getcpu();
int node = numa_node_of_cpu(cpu);
printf("main %lu mticks, node %d, cpu %d\n", (stop - start) / 1000000, node, cpu);
free(threads);
free(longs);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment