Created
June 3, 2017 12:24
-
-
Save jjhursey/61d502825024ab7bf9ffad83ab6c9975 to your computer and use it in GitHub Desktop.
Test Program for discussion on PMIx PR https://github.com/pmix/pmix/pull/384
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Test program for memory consistency in a thread shifting design | |
* | |
* Compile: | |
* gcc -O0 -g -lpthread -I/path-to/hwloc/include/ -L/path-to/hwloc/lib -lhwloc cache-test.c -o cache-test | |
* | |
* Run: | |
* ./cache-test ITERATIONS [MODE] | |
* ./cache-test 9000000 3 | |
* | |
* Example: | |
* ./cache-test 9000000 0 --> Will fail, no memory barriers | |
* ./cache-test 9000000 1 --> Will fail, no WMB | |
* ./cache-test 9000000 2 --> Will fail, no RMB | |
* ./cache-test 9000000 3 --> Success | |
* ./cache-test 9000000 4 --> Success | |
* ./cache-test 9000000 5 --> N/A | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <string.h> | |
#include <pthread.h> | |
#include <stdint.h> | |
#include <hwloc.h> | |
/* | |
* PPC Memory barrier | |
*/ | |
// XMB - Room for something custom | |
#define XMB() exit(-1); | |
#define MB() __asm__ __volatile__ ("sync" : : : "memory") | |
#define RMB() __asm__ __volatile__ ("lwsync" : : : "memory") | |
#define WMB() __asm__ __volatile__ ("eieio" : : : "memory") | |
// Max value for an int16_t | |
#define MAX_VAL 32767 | |
typedef struct { | |
int type; | |
union { | |
bool flag; | |
int integer; | |
int8_t int8; | |
int16_t int16; | |
int32_t int32; | |
int64_t int64; | |
//char padding[1]; | |
} data; | |
} my_value_t; | |
// Structure to handoff work to the peer thread | |
typedef struct { | |
volatile bool working; | |
void *ptr; // Note that adding a volatile here has no effect | |
} thread_handoff_t; | |
// Shared object to handoff work | |
thread_handoff_t handoff; | |
// Indicates if the test has finished | |
bool time_to_stop = false; | |
// Progress reporting | |
#define PERC_INC 10.0 | |
double perc_report_after = PERC_INC; | |
double perc_current = 0.0; | |
// Memory barrier modes | |
#define MB_MODE_NONE 0x0 | |
#define MB_MODE_RMB 0x1 | |
#define MB_MODE_WMB 0x2 | |
#define MB_MODE_MB 0x4 | |
#define MB_MODE_XMB 0x8 | |
#define MB_MODE_ALL (MB_MODE_RMB | MB_MODE_WMB) | |
int mb_mode = MB_MODE_ALL; | |
// Shared hwloc topology (so we only have to read it once) | |
static hwloc_topology_t topo; | |
// Which object we are binding to | |
// 4 - sockets with 5 cores each | |
// 20 - cores with 8 PUs each | |
//#define OBJ_TYPE HWLOC_OBJ_SOCKET | |
#define OBJ_TYPE HWLOC_OBJ_CORE | |
/* | |
* Some basic timing support | |
*/ | |
double acc_time, start_time, stop_time, delta; | |
static double get_ts_gettimeofday(void) { | |
double ret; | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
ret = tv.tv_sec; | |
ret += (double)tv.tv_usec / 1000000.0; | |
return ret; | |
} | |
/* | |
* Bind either the main or support thread far away from each other | |
*/ | |
void bind_me_to(bool main_thread); | |
/* | |
* Support thread to do the memory allocation and xfer | |
*/ | |
void *value_xfer_thread(void *arg); | |
/* | |
* Main thread | |
*/ | |
int main(int argc, char **argv) { | |
pthread_t support_thread; | |
int rc, i, max_iters = 10, cur_iter; | |
my_value_t *val = NULL; | |
int mode; | |
/* | |
* Parse command line arguments | |
*/ | |
if( argc > 1 ) { | |
max_iters = atoi(argv[1]); | |
} | |
if( argc > 2 ) { | |
mode = atoi(argv[2]); | |
if( 0 > mode || mode > 5 ) { | |
printf("Error: Invalid mode %d\n" | |
"\tNone = 0\n" | |
"\tRMB = 1\n" | |
"\tWMB = 2\n" | |
"\tBoth = 3\n" | |
"\tMB Only = 4\n", | |
"\tXMB Only = 5\n", | |
mode); | |
exit(-1); | |
} | |
} | |
else { | |
mode = 3; | |
} | |
switch(mode) { | |
case 0: | |
mb_mode = MB_MODE_NONE; | |
break; | |
case 1: | |
mb_mode = MB_MODE_RMB; | |
break; | |
case 2: | |
mb_mode = MB_MODE_WMB; | |
break; | |
case 3: | |
mb_mode = MB_MODE_ALL; | |
break; | |
case 4: | |
mb_mode = MB_MODE_MB; | |
break; | |
case 5: | |
mb_mode = MB_MODE_XMB; | |
break; | |
} | |
// Load hwloc topology | |
hwloc_topology_init(&topo); | |
hwloc_topology_load(topo); | |
// Display banner | |
printf("---------------------------\n"); | |
printf("Iterations: %10d\n", max_iters); | |
printf("Mode R MB : %10s\n", (mb_mode & MB_MODE_RMB ? "Enabled" : "Disabled") ); | |
printf("Mode W MB : %10s\n", (mb_mode & MB_MODE_WMB ? "Enabled" : "Disabled") ); | |
printf("Mode - MB : %10s\n", (mb_mode & MB_MODE_MB ? "Enabled" : "Disabled") ); | |
printf("Mode X MB : %10s\n", (mb_mode & MB_MODE_XMB ? "Enabled" : "Disabled") ); | |
printf("---------------------------\n"); | |
bind_me_to(true); | |
handoff.working = false; | |
/* | |
* Launch supporting thread | |
*/ | |
rc = pthread_create(&support_thread, NULL, value_xfer_thread, NULL); | |
if( 0 != rc ) { | |
printf("Error: Failed to create a thread! %d\n", rc); | |
exit(-1); | |
} | |
/* | |
* Main work loop | |
*/ | |
acc_time = 0.0; | |
for(cur_iter = 0; cur_iter < max_iters; ++cur_iter) { | |
perc_current = (cur_iter / ((double)max_iters)) * 100.0; | |
if( perc_current > perc_report_after ) { | |
delta = (acc_time / cur_iter) * 1000000; | |
printf("%6.1f %% complete : Iteration %10d / %10d : %6.1f usec / iter\n", | |
perc_current, cur_iter+1, max_iters, delta); | |
perc_report_after += PERC_INC; | |
} | |
start_time = get_ts_gettimeofday(); | |
// Initialize values | |
val = NULL; | |
handoff.ptr = &val; | |
if( mb_mode & MB_MODE_RMB ) { | |
RMB(); | |
} | |
if( mb_mode & MB_MODE_MB ) { | |
MB(); | |
} | |
if( mb_mode & MB_MODE_XMB ) { | |
XMB(); | |
} | |
handoff.working = true; | |
// Wait for work to finish | |
while( handoff.working ) { | |
usleep(1); | |
} | |
if( mb_mode & MB_MODE_WMB ) { | |
WMB(); | |
} | |
if( mb_mode & MB_MODE_MB ) { | |
MB(); | |
} | |
if( mb_mode & MB_MODE_XMB ) { | |
XMB(); | |
} | |
// Inspect values for correctness | |
if( NULL == val ) { | |
printf("[%10d / %10d] Error: val = %s\n", cur_iter+1, max_iters, | |
(NULL == val ? "NULL" : "Valid") ); | |
exit(-1); | |
} | |
else if( 999 != val->type ) { | |
printf("[%10d / %10d] Error: val->type = %d\n", cur_iter+1, max_iters, val->type); | |
exit(-1); | |
} | |
else if( (cur_iter+1)%MAX_VAL != val->data.int16 ) { | |
printf("[%10d / %10d] Error: val->data.int16 = %d\n", cur_iter+1, max_iters, val->data.int16); | |
exit(-1); | |
} | |
stop_time = get_ts_gettimeofday(); | |
acc_time += (stop_time - start_time); | |
// Yes, this is a memory leak! | |
// I need to make sure that the supporting thread is not reusing a | |
// previous storage location when it calls malloc. This is to emulate | |
// a program that calls malloc after the value was acquired, possibly | |
// reusing this memory location. | |
//free(val); | |
val = NULL; | |
} | |
delta = (acc_time / max_iters) * 1000000; | |
/* | |
* All done - Cleanup | |
*/ | |
time_to_stop = true; | |
rc = pthread_join(support_thread, NULL); | |
if( 0 != rc ) { | |
printf("Error: Failed to join a thread! %d\n", rc); | |
exit(-1); | |
} | |
hwloc_topology_destroy(topo); | |
printf("Success - %6.1f usec / iter\n", delta); | |
return 0; | |
} | |
void *value_xfer_thread(void *arg) { | |
my_value_t **val = NULL; | |
static int var = 0; | |
// Bind this thread away from the main thread | |
bind_me_to(false); | |
while( !time_to_stop ) { | |
if( handoff.working ) { | |
// Make sure I have the right pointer | |
if( mb_mode & MB_MODE_WMB ) { | |
WMB(); | |
} | |
if( mb_mode & MB_MODE_MB ) { | |
MB(); | |
} | |
if( mb_mode & MB_MODE_XMB ) { | |
XMB(); | |
} | |
// Allocate and set the value | |
val = (my_value_t**)handoff.ptr; | |
(*val) = malloc(sizeof(my_value_t)); | |
(*val)->type = 999; | |
(*val)->data.int16 = (++var)%MAX_VAL; | |
// Make sure main thread can see the value | |
// See 'Examples' -> 'Global thread flag' discussion here: | |
// https://www.ibm.com/developerworks/systems/articles/powerpc.html | |
if( mb_mode & MB_MODE_RMB ) { | |
RMB(); | |
} | |
if( mb_mode & MB_MODE_MB ) { | |
MB(); | |
} | |
if( mb_mode & MB_MODE_XMB ) { | |
XMB(); | |
} | |
// Release main thread | |
handoff.working = false; | |
} | |
else { | |
// wait for work | |
usleep(1); | |
} | |
} | |
pthread_exit(NULL); | |
} | |
void bind_me_to(bool main_thread) { | |
int num_objs; | |
hwloc_cpuset_t set; | |
char *buffer = NULL; | |
hwloc_obj_t obj; | |
num_objs = hwloc_get_nbobjs_by_type(topo, OBJ_TYPE); | |
if( main_thread ) { | |
obj = hwloc_get_obj_by_type(topo, OBJ_TYPE, 0); | |
} | |
else { | |
obj = hwloc_get_obj_by_type(topo, OBJ_TYPE, num_objs-1); | |
} | |
if( obj->type == OBJ_TYPE ) { | |
hwloc_set_cpubind(topo, obj->cpuset, HWLOC_CPUBIND_THREAD); | |
} | |
else { | |
printf("Error: Invalid object\n"); | |
exit(-1); | |
} | |
set = hwloc_bitmap_alloc(); | |
hwloc_get_cpubind(topo, set, HWLOC_CPUBIND_THREAD); | |
hwloc_bitmap_asprintf(&buffer, set); | |
printf("%s : [objs = %d] : cpuset is %s\n", (main_thread ? "Main" : "Peer"), num_objs, buffer); | |
free(buffer); | |
hwloc_bitmap_free(set); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment