Skip to content

Instantly share code, notes, and snippets.

@mratsim
Last active September 27, 2019 10:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mratsim/cec39df71606eccab93255217141b274 to your computer and use it in GitHub Desktop.
Save mratsim/cec39df71606eccab93255217141b274 to your computer and use it in GitHub Desktop.
// ----------------------------------
// Time measurements
#include <sys/time.h>
#include <time.h>
// Number of seconds since the Epoch
static inline double Wtime_sec(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec / 1e6;
}
// Number of milliseconds since the Epoch
static inline double Wtime_msec(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec * 1e3 + tv.tv_usec / 1e3;
}
// Number of microseconds since the Epoch
static inline double Wtime_usec(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec * 1e6 + tv.tv_usec;
}
// ----------------------------------
#include <stddef.h>
#include <cblas.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
int init_rand(float* __restrict x, const int64_t len) {
for (int64_t i=0; i<len; ++i)
{
union
{
uint32_t i;
float f;
} r = { rand() };
x[i] = r.f;
}
return 0;
}
int custom_contiguous_axpy(const float a, const float* __restrict x, float* __restrict y, const int64_t n)
{
for (int64_t i=0; i<n; ++i)
y[i]+=a*x[i];
return 0;
}
// Compile with
// gcc -O3 -mavx2 -mfma -fopt-info -lcblas loopvec.c
int main(int argc, char *argv[])
{
double start, stop;
if (argc != 2) {
printf("usage: \"axpy N\" where N is the arrays' length");
exit(0);
}
int64_t N = atoi(argv[1]);
srand(1234); // random seed
float a = rand();
float* x = malloc(N * sizeof(float));
float* y1 = malloc(N * sizeof(float));
float* y2 = malloc(N * sizeof(float));
init_rand(x, N);
memset(y1, 0, N * sizeof(float));
memset(y2, 0, N * sizeof(float));
start = Wtime_msec();
cblas_saxpy(N, a, x, 1, y2, 1);
stop = Wtime_msec();
printf("Elapsed wall time - BLAS axpy: %.2lf ms\n", stop-start);
start = Wtime_msec();
custom_contiguous_axpy(a, x, y1, N);
stop = Wtime_msec();
printf("Elapsed wall time - custom loop: %.2lf ms\n", stop-start);
free(x);
free(y1);
free(y2);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment