Simple benchmark for vector addition
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/time.h> | |
#include <time.h> | |
typedef struct { | |
struct timeval start; | |
struct timeval end; | |
// Milliseconds | |
double elapsed_time; | |
} timing_t; | |
static void timing_start(timing_t *t) { | |
memset(t, 0, sizeof(*t)); | |
gettimeofday(&(t->start), NULL); | |
} | |
static void timing_end(timing_t *t) { | |
gettimeofday(&(t->end), NULL); | |
double start_value = t->start.tv_sec * 1e6 + t->start.tv_usec; | |
double end_value = t->end.tv_sec * 1e6 + t->end.tv_usec; | |
double diff_value = end_value - start_value; | |
t->elapsed_time = diff_value / 1e3; | |
} | |
static double timing_elapsed(const timing_t *t) { return (t->elapsed_time); } | |
static void vadd_ref(int n, const float *restrict x, float *restrict y) { | |
#pragma clang loop vectorize(disable) interleave(disable) | |
for (int i = 0; i < n; ++i) { | |
y[i] = x[i] + y[i]; | |
} | |
} | |
static void vadd_vec(int n, const float *restrict x, float *restrict y) { | |
#pragma clang loop vectorize(enable) interleave(enable) | |
for (int i = 0; i < n; ++i) { | |
y[i] = x[i] + y[i]; | |
} | |
} | |
static float *init(int n, float k) { | |
float *p = (float *)malloc(n * sizeof(*p)); | |
#pragma clang loop vectorize(disable) interleave(disable) | |
for (int i = 0; i < n; i++) { | |
p[i] = i * (k + 1); | |
} | |
return p; | |
} | |
static void help(char *argv[]) { | |
fprintf(stdout, "usage: %s ntimes scalar|vector|both|bench size noval|val\n", | |
argv[0]); | |
} | |
static void die(const char *str) { | |
fprintf(stderr, "ERROR: %s\n", str); | |
exit(EXIT_FAILURE); | |
} | |
typedef enum { | |
INVALID = 0, | |
SCALAR = 0x1 << 0, | |
VECTOR = 0x1 << 1, | |
BOTH = SCALAR | VECTOR, | |
BENCH = 0x1 << 2, | |
NOVAL = 0x1 << 3, | |
} mode_op; | |
int main(int argc, char *argv[]) { | |
if (argc < 5) { | |
help(argv); | |
die("Invalid number of arguments"); | |
} | |
int ntimes = atoi(argv[1]); | |
if (ntimes <= 0) { | |
help(argv); | |
die("number of times must larger than 0"); | |
} | |
mode_op mode = INVALID; | |
const char *mode_str = argv[2]; | |
if (!strcmp(mode_str, "scalar")) | |
mode = SCALAR; | |
else if (!strcmp(mode_str, "vector")) | |
mode = VECTOR; | |
else if (!strcmp(mode_str, "both")) | |
mode = BOTH; | |
else if (!strcmp(mode_str, "bench")) | |
mode = BOTH | BENCH; | |
if (mode == INVALID) { | |
help(argv); | |
die("Invalid mode"); | |
} | |
int n = atoi(argv[3]); | |
if (n <= 0) { | |
die("Invalid size\n"); | |
} | |
const char *noval = argv[4]; | |
if (!strcmp(noval, "noval")) | |
mode |= NOVAL; | |
else if (!strcmp(noval, "val")) { | |
if ((mode & BOTH) != BOTH) { | |
die("Validation is only possible if both benchmarks run"); | |
} | |
} else { | |
help(argv); | |
die("Invalid validation option"); | |
} | |
timing_t t; | |
if ((mode & BENCH) == 0) { | |
fprintf(stdout, "Initialising..."); | |
} | |
timing_start(&t); | |
const float *y = init(n, 0.3); | |
const float *x = init(n, -0.5); | |
timing_end(&t); | |
if ((mode & BENCH) == 0) { | |
fprintf(stdout, "Done: %f ms\n", timing_elapsed(&t)); | |
} | |
// Reference | |
float *ref_y = NULL; | |
double ref_time; | |
if ((mode & SCALAR) == SCALAR) { | |
if ((mode & BENCH) == 0) { | |
fprintf(stdout, "Reference..."); | |
} | |
ref_y = malloc(n * sizeof(*ref_y)); | |
memcpy(ref_y, y, n * sizeof(*ref_y)); | |
timing_start(&t); | |
for (int nt = 0; nt < ntimes; nt++) { | |
vadd_ref(n, x, ref_y); | |
} | |
timing_end(&t); | |
ref_time = timing_elapsed(&t); | |
if ((mode & NOVAL) == NOVAL) { | |
free(ref_y); | |
} | |
if ((mode & BENCH) == 0) { | |
fprintf(stdout, "Done. Total: %f ms. Average: %f ms\n", ref_time, | |
ref_time / ntimes); | |
} | |
} | |
// Vector | |
float *vec_y = NULL; | |
double vec_time; | |
if ((mode & VECTOR) == VECTOR) { | |
if ((mode & BENCH) == 0) { | |
fprintf(stdout, "Vector..."); | |
} | |
vec_y = malloc(n * sizeof(*vec_y)); | |
memcpy(vec_y, y, n * sizeof(*vec_y)); | |
timing_start(&t); | |
for (int nt = 0; nt < ntimes; nt++) { | |
vadd_vec(n, x, vec_y); | |
} | |
timing_end(&t); | |
vec_time = timing_elapsed(&t); | |
if ((mode & NOVAL) == NOVAL) { | |
free(vec_y); | |
} | |
if ((mode & BENCH) == 0) { | |
fprintf(stdout, "Done. Total: %f ms. Average: %f ms\n", vec_time, | |
vec_time / ntimes); | |
} | |
} | |
if ((mode & BENCH) == BENCH) { | |
fprintf(stdout, "%f\n", ref_time / vec_time); | |
} else if ((mode & BOTH) == BOTH) { | |
fprintf(stdout, "Ratio Vector/Scalar: %f\n", ref_time / vec_time); | |
} | |
// Check | |
int errors = 0; | |
if ((mode & BOTH) == BOTH && (mode & NOVAL) == 0) { | |
if ((mode & BENCH) == 0) { | |
fprintf(stdout, "Checking..."); | |
} | |
timing_start(&t); | |
#pragma clang loop vectorize(disable) interleave(disable) | |
for (int i = 0; i < n; i++) { | |
if (ref_y[i] != vec_y[i]) { | |
// printf("[%d] is wrong scalar=%f vs vector=%f\n", i, ref_y[i], | |
// vec_y[i]); | |
errors++; | |
} | |
} | |
timing_end(&t); | |
if (!errors) { | |
if ((mode & BENCH) == 0) { | |
printf("OK! (%f msec)\n", timing_elapsed(&t)); | |
} | |
} else { | |
printf("Errors: #%d (%f msec)\n", errors, timing_elapsed(&t)); | |
} | |
} else { | |
if ((mode & BENCH) == 0) { | |
printf("Done (no validation)\n"); | |
} | |
} | |
return (errors == 0) ? EXIT_SUCCESS : EXIT_FAILURE; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment