Skip to content

Instantly share code, notes, and snippets.

@will127534
Last active July 8, 2024 06:27
Show Gist options
  • Save will127534/bca4aa22af1c864740408020e6bf0ae0 to your computer and use it in GitHub Desktop.
Save will127534/bca4aa22af1c864740408020e6bf0ae0 to your computer and use it in GitHub Desktop.
Modified from https://github.com/raas/mbw/blob/master/mbw.c with multithread support
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/time.h>
#include <time.h>
#include <string.h>
#include <pthread.h>
#include <sched.h>
#define DEFAULT_NR_LOOPS 10
#define MAX_TESTS 3
#define DEFAULT_BLOCK_SIZE 262144
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
#define TEST_MEMCPY 0
#define TEST_DUMB 1
#define TEST_MCBLOCK 2
#define VERSION "1.5"
typedef struct {
unsigned long long asize;
long *a;
long *b;
int type;
unsigned long long block_size;
int thread_num;
int nr_loops;
double mt;
} worker_args;
void usage() {
printf("mbw memory benchmark v%s, https://github.com/raas/mbw\n", VERSION);
printf("Usage: mbw [options] array_size_in_MiB\n");
printf("Options:\n");
printf(" -n: number of runs per test (0 to run forever)\n");
printf(" -a: Don't display average\n");
printf(" -t%d: memcpy test\n", TEST_MEMCPY);
printf(" -t%d: dumb (b[i]=a[i] style) test\n", TEST_DUMB);
printf(" -t%d: memcpy test with fixed block size\n", TEST_MCBLOCK);
printf(" -b <size>: block size in bytes for -t2 (default: %d)\n", DEFAULT_BLOCK_SIZE);
printf(" -q: quiet (print statistics only)\n");
printf(" -c: number of threads\n");
printf("(will then use two arrays, watch out for swapping)\n");
printf("'Bandwidth' is amount of data copied over the time this operation took.\n");
printf("\nThe default is to run all tests available.\n");
}
long *make_array(unsigned long long asize) {
unsigned long long t;
unsigned int long_size = sizeof(long);
long *a;
a = calloc(asize, long_size);
if (NULL == a) {
perror("Error allocating memory");
exit(1);
}
for (t = 0; t < asize; t++) {
a[t] = 0xaa;
}
return a;
}
double worker(unsigned long long asize, long *a, long *b, int type, unsigned long long block_size) {
unsigned long long t;
struct timeval starttime, endtime;
double te;
unsigned int long_size = sizeof(long);
unsigned long long array_bytes = asize * long_size;
gettimeofday(&starttime, NULL);
if (type == TEST_MEMCPY) {
memcpy(b, a, array_bytes);
} else if (type == TEST_MCBLOCK) {
char *src = (char *)a;
char *dst = (char *)b;
for (t = array_bytes; t >= block_size; t -= block_size, src += block_size) {
dst = (char *)memcpy(dst, src, block_size) + block_size;
}
if (t) {
memcpy(dst, src, t);
}
} else if (type == TEST_DUMB) {
for (t = 0; t < asize; t++) {
b[t] = a[t];
}
}
gettimeofday(&endtime, NULL);
te = ((double)(endtime.tv_sec * 1000000 + endtime.tv_usec) - (starttime.tv_sec * 1000000 + starttime.tv_usec)) / 1000000;
return te;
}
void printout(double te, double mt, int type) {
switch (type) {
case TEST_MEMCPY:
printf("Method: MEMCPY\t");
break;
case TEST_DUMB:
printf("Method: DUMB\t");
break;
case TEST_MCBLOCK:
printf("Method: MCBLOCK\t");
break;
}
printf("Elapsed: %.5f\t", te);
printf("MiB: %.5f\t", mt);
printf("Copy: %.3f MiB/s\n", mt / te);
}
void *worker_wrapper(void *args) {
//pthread_mutex_lock(&mutex);
//pthread_cond_wait(&cond, &mutex);
worker_args *wargs = (worker_args *)args;
//printf("Thread %d start", wargs->thread_num);
pthread_mutex_lock(&mutex);
//printf("Thread %d waiting", wargs->thread_num);
pthread_cond_wait(&cond, &mutex);
pthread_mutex_unlock(&mutex);
//worker_args *wargs = (worker_args *)args;
double te, te_sum = 0;
for (int i = 0; wargs->nr_loops == 0 || i < wargs->nr_loops; i++) {
te = worker(wargs->asize, wargs->a, wargs->b, wargs->type, wargs->block_size);
te_sum += te;
//printf("Thread %d, Iteration %d\t", wargs->thread_num, i);
//printout(te, wargs->mt, wargs->type);
}
if (wargs->nr_loops > 0) {
printf("Thread %d AVG\t", wargs->thread_num);
printout(te_sum / wargs->nr_loops, wargs->mt, wargs->type);
}
return NULL;
}
int main(int argc, char **argv) {
unsigned long long asize = 0, block_size = DEFAULT_BLOCK_SIZE;
double mt = 0;
int o, num_threads = 1, nr_loops = DEFAULT_NR_LOOPS, showavg = 1, quiet = 0, tests[MAX_TESTS] = {0};
pthread_t *threads;
cpu_set_t cpus;
worker_args *args;
while ((o = getopt(argc, argv, "haqn:t:b:c:")) != EOF) {
switch (o) {
case 'h':
usage();
exit(1);
case 'a':
showavg = 0;
break;
case 'n':
nr_loops = strtoul(optarg, NULL, 10);
break;
case 't':
tests[strtoul(optarg, NULL, 10)] = 1;
break;
case 'b':
block_size = strtoull(optarg, NULL, 10);
break;
case 'q':
quiet = 1;
break;
case 'c':
num_threads = atoi(optarg);
break;
default:
break;
}
}
if (optind < argc) {
mt = strtoul(argv[optind++], NULL, 10);
} else {
printf("Error: no array size given!\n");
exit(1);
}
if (0 >= mt) {
printf("Error: array size wrong!\n");
exit(1);
}
asize = 1024 * 1024 / sizeof(long) * mt;
if (asize * sizeof(long) < block_size) {
printf("Error: array size larger than block size (%llu bytes)!\n", block_size);
exit(1);
}
if (tests[0] == 0 && tests[1] == 0 && tests[2] == 0) {
tests[0] = tests[1] = tests[2] = 1;
}
if (!quiet) {
//printf("Allocating 2*%llu elements = %llu bytes of memory.\n", asize, 2 * asize * sizeof(long));
if (tests[2]) {
// printf("Using %llu bytes as blocks for memcpy block copy test.\n", block_size);
}
}
//long *a = make_array(asize);
//long *b = make_array(asize);
threads = malloc(num_threads * sizeof(pthread_t));
args = malloc(num_threads * sizeof(worker_args));
for (int testno = 0; testno < MAX_TESTS; testno++) {
if (tests[testno]) {
for (int t = 0; t < num_threads; t++) {
CPU_ZERO(&cpus);
CPU_SET(t, &cpus); // Correctly pin threads to different CPUs based on their thread index
args[t].asize = asize;
args[t].a = make_array(asize);
args[t].b = make_array(asize);
args[t].block_size = block_size;
args[t].nr_loops = nr_loops;
args[t].mt = mt;
args[t].thread_num = t;
args[t].type = testno;
pthread_create(&threads[t], NULL, worker_wrapper, &args[t]);
pthread_setaffinity_np(threads[t], sizeof(cpu_set_t), &cpus);
}
sleep(1);
pthread_cond_broadcast(&cond);
// Join threads after all have been created for the current test
for (int t = 0; t < num_threads; t++) {
pthread_join(threads[t], NULL);
free(args[t].a);
free(args[t].b);
}
}
}
free(threads);
free(args);
return 0;
}
@will127534
Copy link
Author

gcc mbw.c -o mbw

./mbw 1000 -c 4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment