Last active
July 8, 2024 06:27
-
-
Save will127534/bca4aa22af1c864740408020e6bf0ae0 to your computer and use it in GitHub Desktop.
Modified from https://github.com/raas/mbw/blob/master/mbw.c with multithread support
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <errno.h> | |
#include <sys/types.h> | |
#include <sys/time.h> | |
#include <time.h> | |
#include <string.h> | |
#include <pthread.h> | |
#include <sched.h> | |
#define DEFAULT_NR_LOOPS 10 | |
#define MAX_TESTS 3 | |
#define DEFAULT_BLOCK_SIZE 262144 | |
pthread_cond_t cond = PTHREAD_COND_INITIALIZER; | |
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; | |
#define TEST_MEMCPY 0 | |
#define TEST_DUMB 1 | |
#define TEST_MCBLOCK 2 | |
#define VERSION "1.5" | |
typedef struct { | |
unsigned long long asize; | |
long *a; | |
long *b; | |
int type; | |
unsigned long long block_size; | |
int thread_num; | |
int nr_loops; | |
double mt; | |
} worker_args; | |
void usage() { | |
printf("mbw memory benchmark v%s, https://github.com/raas/mbw\n", VERSION); | |
printf("Usage: mbw [options] array_size_in_MiB\n"); | |
printf("Options:\n"); | |
printf(" -n: number of runs per test (0 to run forever)\n"); | |
printf(" -a: Don't display average\n"); | |
printf(" -t%d: memcpy test\n", TEST_MEMCPY); | |
printf(" -t%d: dumb (b[i]=a[i] style) test\n", TEST_DUMB); | |
printf(" -t%d: memcpy test with fixed block size\n", TEST_MCBLOCK); | |
printf(" -b <size>: block size in bytes for -t2 (default: %d)\n", DEFAULT_BLOCK_SIZE); | |
printf(" -q: quiet (print statistics only)\n"); | |
printf(" -c: number of threads\n"); | |
printf("(will then use two arrays, watch out for swapping)\n"); | |
printf("'Bandwidth' is amount of data copied over the time this operation took.\n"); | |
printf("\nThe default is to run all tests available.\n"); | |
} | |
long *make_array(unsigned long long asize) { | |
unsigned long long t; | |
unsigned int long_size = sizeof(long); | |
long *a; | |
a = calloc(asize, long_size); | |
if (NULL == a) { | |
perror("Error allocating memory"); | |
exit(1); | |
} | |
for (t = 0; t < asize; t++) { | |
a[t] = 0xaa; | |
} | |
return a; | |
} | |
double worker(unsigned long long asize, long *a, long *b, int type, unsigned long long block_size) { | |
unsigned long long t; | |
struct timeval starttime, endtime; | |
double te; | |
unsigned int long_size = sizeof(long); | |
unsigned long long array_bytes = asize * long_size; | |
gettimeofday(&starttime, NULL); | |
if (type == TEST_MEMCPY) { | |
memcpy(b, a, array_bytes); | |
} else if (type == TEST_MCBLOCK) { | |
char *src = (char *)a; | |
char *dst = (char *)b; | |
for (t = array_bytes; t >= block_size; t -= block_size, src += block_size) { | |
dst = (char *)memcpy(dst, src, block_size) + block_size; | |
} | |
if (t) { | |
memcpy(dst, src, t); | |
} | |
} else if (type == TEST_DUMB) { | |
for (t = 0; t < asize; t++) { | |
b[t] = a[t]; | |
} | |
} | |
gettimeofday(&endtime, NULL); | |
te = ((double)(endtime.tv_sec * 1000000 + endtime.tv_usec) - (starttime.tv_sec * 1000000 + starttime.tv_usec)) / 1000000; | |
return te; | |
} | |
void printout(double te, double mt, int type) { | |
switch (type) { | |
case TEST_MEMCPY: | |
printf("Method: MEMCPY\t"); | |
break; | |
case TEST_DUMB: | |
printf("Method: DUMB\t"); | |
break; | |
case TEST_MCBLOCK: | |
printf("Method: MCBLOCK\t"); | |
break; | |
} | |
printf("Elapsed: %.5f\t", te); | |
printf("MiB: %.5f\t", mt); | |
printf("Copy: %.3f MiB/s\n", mt / te); | |
} | |
void *worker_wrapper(void *args) { | |
//pthread_mutex_lock(&mutex); | |
//pthread_cond_wait(&cond, &mutex); | |
worker_args *wargs = (worker_args *)args; | |
//printf("Thread %d start", wargs->thread_num); | |
pthread_mutex_lock(&mutex); | |
//printf("Thread %d waiting", wargs->thread_num); | |
pthread_cond_wait(&cond, &mutex); | |
pthread_mutex_unlock(&mutex); | |
//worker_args *wargs = (worker_args *)args; | |
double te, te_sum = 0; | |
for (int i = 0; wargs->nr_loops == 0 || i < wargs->nr_loops; i++) { | |
te = worker(wargs->asize, wargs->a, wargs->b, wargs->type, wargs->block_size); | |
te_sum += te; | |
//printf("Thread %d, Iteration %d\t", wargs->thread_num, i); | |
//printout(te, wargs->mt, wargs->type); | |
} | |
if (wargs->nr_loops > 0) { | |
printf("Thread %d AVG\t", wargs->thread_num); | |
printout(te_sum / wargs->nr_loops, wargs->mt, wargs->type); | |
} | |
return NULL; | |
} | |
int main(int argc, char **argv) { | |
unsigned long long asize = 0, block_size = DEFAULT_BLOCK_SIZE; | |
double mt = 0; | |
int o, num_threads = 1, nr_loops = DEFAULT_NR_LOOPS, showavg = 1, quiet = 0, tests[MAX_TESTS] = {0}; | |
pthread_t *threads; | |
cpu_set_t cpus; | |
worker_args *args; | |
while ((o = getopt(argc, argv, "haqn:t:b:c:")) != EOF) { | |
switch (o) { | |
case 'h': | |
usage(); | |
exit(1); | |
case 'a': | |
showavg = 0; | |
break; | |
case 'n': | |
nr_loops = strtoul(optarg, NULL, 10); | |
break; | |
case 't': | |
tests[strtoul(optarg, NULL, 10)] = 1; | |
break; | |
case 'b': | |
block_size = strtoull(optarg, NULL, 10); | |
break; | |
case 'q': | |
quiet = 1; | |
break; | |
case 'c': | |
num_threads = atoi(optarg); | |
break; | |
default: | |
break; | |
} | |
} | |
if (optind < argc) { | |
mt = strtoul(argv[optind++], NULL, 10); | |
} else { | |
printf("Error: no array size given!\n"); | |
exit(1); | |
} | |
if (0 >= mt) { | |
printf("Error: array size wrong!\n"); | |
exit(1); | |
} | |
asize = 1024 * 1024 / sizeof(long) * mt; | |
if (asize * sizeof(long) < block_size) { | |
printf("Error: array size larger than block size (%llu bytes)!\n", block_size); | |
exit(1); | |
} | |
if (tests[0] == 0 && tests[1] == 0 && tests[2] == 0) { | |
tests[0] = tests[1] = tests[2] = 1; | |
} | |
if (!quiet) { | |
//printf("Allocating 2*%llu elements = %llu bytes of memory.\n", asize, 2 * asize * sizeof(long)); | |
if (tests[2]) { | |
// printf("Using %llu bytes as blocks for memcpy block copy test.\n", block_size); | |
} | |
} | |
//long *a = make_array(asize); | |
//long *b = make_array(asize); | |
threads = malloc(num_threads * sizeof(pthread_t)); | |
args = malloc(num_threads * sizeof(worker_args)); | |
for (int testno = 0; testno < MAX_TESTS; testno++) { | |
if (tests[testno]) { | |
for (int t = 0; t < num_threads; t++) { | |
CPU_ZERO(&cpus); | |
CPU_SET(t, &cpus); // Correctly pin threads to different CPUs based on their thread index | |
args[t].asize = asize; | |
args[t].a = make_array(asize); | |
args[t].b = make_array(asize); | |
args[t].block_size = block_size; | |
args[t].nr_loops = nr_loops; | |
args[t].mt = mt; | |
args[t].thread_num = t; | |
args[t].type = testno; | |
pthread_create(&threads[t], NULL, worker_wrapper, &args[t]); | |
pthread_setaffinity_np(threads[t], sizeof(cpu_set_t), &cpus); | |
} | |
sleep(1); | |
pthread_cond_broadcast(&cond); | |
// Join threads after all have been created for the current test | |
for (int t = 0; t < num_threads; t++) { | |
pthread_join(threads[t], NULL); | |
free(args[t].a); | |
free(args[t].b); | |
} | |
} | |
} | |
free(threads); | |
free(args); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
gcc mbw.c -o mbw
./mbw 1000 -c 4