Last active
September 27, 2019 10:19
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ---------------------------------- | |
// Time measurements | |
#include <sys/time.h> | |
#include <time.h> | |
// Number of seconds since the Epoch | |
static inline double Wtime_sec(void) | |
{ | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
return tv.tv_sec + tv.tv_usec / 1e6; | |
} | |
// Number of milliseconds since the Epoch | |
static inline double Wtime_msec(void) | |
{ | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
return tv.tv_sec * 1e3 + tv.tv_usec / 1e3; | |
} | |
// Number of microseconds since the Epoch | |
static inline double Wtime_usec(void) | |
{ | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
return tv.tv_sec * 1e6 + tv.tv_usec; | |
} | |
// ---------------------------------- | |
#include <stddef.h> | |
#include <cblas.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <string.h> | |
int init_rand(float* __restrict x, const int64_t len) { | |
for (int64_t i=0; i<len; ++i) | |
{ | |
union | |
{ | |
uint32_t i; | |
float f; | |
} r = { rand() }; | |
x[i] = r.f; | |
} | |
return 0; | |
} | |
int custom_contiguous_axpy(const float a, const float* __restrict x, float* __restrict y, const int64_t n) | |
{ | |
for (int64_t i=0; i<n; ++i) | |
y[i]+=a*x[i]; | |
return 0; | |
} | |
// Compile with | |
// gcc -O3 -mavx2 -mfma -fopt-info -lcblas loopvec.c | |
int main(int argc, char *argv[]) | |
{ | |
double start, stop; | |
if (argc != 2) { | |
printf("usage: \"axpy N\" where N is the arrays' length"); | |
exit(0); | |
} | |
int64_t N = atoi(argv[1]); | |
srand(1234); // random seed | |
float a = rand(); | |
float* x = malloc(N * sizeof(float)); | |
float* y1 = malloc(N * sizeof(float)); | |
float* y2 = malloc(N * sizeof(float)); | |
init_rand(x, N); | |
memset(y1, 0, N * sizeof(float)); | |
memset(y2, 0, N * sizeof(float)); | |
start = Wtime_msec(); | |
cblas_saxpy(N, a, x, 1, y2, 1); | |
stop = Wtime_msec(); | |
printf("Elapsed wall time - BLAS axpy: %.2lf ms\n", stop-start); | |
start = Wtime_msec(); | |
custom_contiguous_axpy(a, x, y1, N); | |
stop = Wtime_msec(); | |
printf("Elapsed wall time - custom loop: %.2lf ms\n", stop-start); | |
free(x); | |
free(y1); | |
free(y2); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment