Last active
July 15, 2020 23:44
-
-
Save duarten/b7ee60b4412596440a97498d87bf402e to your computer and use it in GitHub Desktop.
Example program to demonstrate false sharing between threads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* This is an example program to demonstrate false sharing between threads. | |
* | |
* It can be compiled two ways: | |
* gcc -g false_sharing.c -Wall -pthread -lnuma -o false_sharing | |
* gcc -g false_sharing.c -Wall -pthread -lnuma -DNO_FALSE_SHARING -o no_false_sharing | |
* | |
* The -DNO_FALSE_SHARING macro reduces the false sharing. | |
* | |
* The usage is: | |
* ./false_sharing <number of threads in a NUMA node> | |
* ./no_false_sharing <number of threads in a NUMA node> | |
*/ | |
#define _MULTI_THREADED | |
#define _GNU_SOURCE | |
#include <stdio.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <unistd.h> | |
#include <sched.h> | |
#include <pthread.h> | |
#include <sys/types.h> | |
#include <assert.h> | |
#include <numa.h> | |
#define LOOP_CNT (100 * 1024 * 1024) | |
static __inline__ uint64_t rdtsc() { | |
uint64_t hi, lo; | |
__asm__ __volatile__ ( "rdtsc" : "=a"(lo), "=d"(hi)); | |
return lo | (hi << 32); | |
} | |
#ifdef NO_FALSE_SHARING | |
#define ALIGN 128 | |
#else | |
#define ALIGN sizeof(uint64_t) | |
#endif | |
struct padded_long { | |
uint64_t value; | |
} __attribute__((aligned (ALIGN))); | |
struct thread_data { | |
pthread_t tid; | |
int numa_node; | |
volatile uint64_t* idx; | |
volatile int running; | |
}; | |
volatile int all_running = 0; | |
#define check_result(string, val) { \ | |
if (val) { \ | |
printf("Failed with %d at %s", val, string); \ | |
exit(1); \ | |
} \ | |
} | |
void* read_write_func(void* arg) { | |
struct thread_data* t = (struct thread_data*)arg; | |
numa_run_on_node(t->numa_node); | |
t->running = 1; | |
while (!all_running) ; | |
uint64_t start = rdtsc(); | |
for (int i = 0; i < LOOP_CNT; ++i) { | |
*t->idx += i; | |
} | |
uint64_t stop = rdtsc(); | |
printf("%lu mticks, node %d, cpu %d\n", (stop - start) / 1000000, t->numa_node, sched_getcpu()); | |
return NULL; | |
} | |
int main(int argc, char *argv[]) { | |
if (argc != 2) { | |
printf( "usage: %s <n>\n", argv[0] ); | |
printf( "where \"n\" is the number of threads per NUMA node\n"); | |
exit(1); | |
} | |
int num_nodes = numa_max_node() + 1; | |
int num_threads = atoi(argv[1]); | |
num_threads = num_threads > 1 ? num_threads * num_nodes : 2; | |
struct thread_data* threads = (struct thread_data*) malloc(sizeof(struct thread_data) * num_threads); | |
struct padded_long* longs = (struct padded_long*) malloc(sizeof(struct padded_long) * num_threads); | |
assert((uint64_t)&longs[1] - (uint64_t)&longs[0] == ALIGN); | |
for (int i = 0; i < num_threads; ++i) { | |
threads[i].idx = &longs[i].value; | |
threads[i].numa_node = i % num_nodes; | |
threads[i].running = 0; | |
int rc = pthread_create(&threads[i].tid, NULL, read_write_func, &threads[i]); | |
check_result("pthread_create()\n", rc); | |
} | |
for (int i = 0; i < num_threads; ++i) { | |
while (!threads[i].running) ; | |
} | |
all_running = 1; | |
uint64_t start = rdtsc(); | |
for (int i = 0; i < num_threads; i++) { | |
int rc = pthread_join(threads[i].tid, NULL); | |
check_result("pthread_join()\n", rc); | |
} | |
uint64_t stop = rdtsc(); | |
int cpu = sched_getcpu(); | |
int node = numa_node_of_cpu(cpu); | |
printf("main %lu mticks, node %d, cpu %d\n", (stop - start) / 1000000, node, cpu); | |
free(threads); | |
free(longs); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment