Skip to content

Instantly share code, notes, and snippets.

@quantumelixir
Created January 15, 2011 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save quantumelixir/780937 to your computer and use it in GitHub Desktop.
Save quantumelixir/780937 to your computer and use it in GitHub Desktop.
Matrix-Vector multiplication optimization and benchmarking
/*
* y += A*x
*
* Matrix-Vector multiplication optimization experiment
* TODO: Benchmark against the following:
* eigen2: ourselves, with the default options (SSE2 vectorization enabled).
* eigen2_novec: ourselves but with Eigen's explicit vectorization disabled. However, gcc's auto-vectorization was enabled.
* INTEL_MKL: The Intel Math Kernel Library, which includes a BLAS/LAPACK (11.0). Closed-source.
* ACML: The AMD's core math library, which includes a BLAS/LAPACK (4.2.0). Closed-source.
* GOTO: The GOTO BLAS library (1.26).
* ATLAS: The math-atlas BLAS library (3.8.3).
*/
#include <stdio.h>
#include <malloc.h>
#include <stdlib.h>
#include <cblas.h>
#include <math.h>
#include <time.h>
void my_gemv(int M, int N, double* As, double* xs, double* ys);
// time stuff
static clock_t tic_timestart;
void tic(void) {
tic_timestart = clock();
}
double toc(void) {
clock_t tic_timestop;
tic_timestop = clock();
printf("%8.2f\n", (double)(tic_timestop - tic_timestart) / CLOCKS_PER_SEC);
return (double)(tic_timestop - tic_timestart) / CLOCKS_PER_SEC;
}
double tocq(void) {
clock_t tic_timestop;
tic_timestop = clock();
return (double)(tic_timestop - tic_timestart) / CLOCKS_PER_SEC;
}
double
error(double* a, double* b, int n)
{
double e = 0.0;
for (int i = 0; i < n; ++i)
e += fabs(a[i] - b[i]);
return e;
}
void
print_vector(double* y, int n)
{
printf("------------------------------------\n");
for (int i = 0; i < n; ++i)
printf("%g ", y[i]);
printf("\n");
printf("------------------------------------\n\n");
}
void
init(double** y, double **ymine, double** A, double** x, int M, int N)
{
*y = (double *) malloc(sizeof(double) * M);
*ymine = (double *) malloc(sizeof(double) * M);
*A = (double *) malloc(sizeof(double) * M * N);
*x = (double *) malloc(sizeof(double) * N);
srand(time(NULL));
for (int i = 0; i < N; ++i)
(*x)[i] = rand() * 1.0/RAND_MAX;
for (int i = 0; i < M; ++i)
for (int j = 0; j < N; ++j)
(*A)[i*N + j] = rand() * 1.0/RAND_MAX;
}
void
destroy(double *y, double *ymine, double* A, double *x)
{
free(y);
free(ymine);
free(A);
free(x);
}
int main(int argc, char **argv)
{
double *ys, *ymine, *As, *xs;
int M, N;
if (argc < 3) {
printf("Run with required arguments M and N\n");
exit(1);
}
else {
M = atoi(argv[1]);
N = atoi(argv[2]);
}
printf("Setting up.. ");
tic();
init(&ys, &ymine, &As, &xs, M, N);
toc();
printf("\n");
printf( "cblas_sgemv: " );
tic();
cblas_dgemv(CblasRowMajor, CblasNoTrans, M, N, 1.0, As, N, xs, 1, 0.0, ys, 1);
toc();
printf( "my_gemv : " );
tic();
my_gemv(M, N, As, xs, ymine);
toc();
printf("Max absolute error: %g\n", error(ys, ymine, M));
destroy(ys, ymine, As, xs);
return 0;
}
void
my_gemv(int M, int N, double* As, double* xs, double* ys)
{
// register blocking?
double *x = xs, *A = As, *y = ys;
double *ye = ys + M, *xe = xs + N;
double *y1 = ys, *y2 = ys + 1;
double *A1 = As, *A2 = As + N;
while (y1 != ye)
{
x = xs;
while (x != xe)
{
*y1 += *A1++ * *x;
*y2 += *A2++ * *x;
x++;
}
y1+=2; y2+=2;
A1+=N; A2+=N;
}
}
// gcc -std=gnu9x -L/opt/local/lib -lcblas -I/usr/include/malloc/ -o gemv gemv.c
/*
* // naive version
* [19:05][~]$ g++ -O3 gemv.cpp && time ./gemv 10000 10000
*
* real 0m3.047s
* user 0m2.413s
* sys 0m0.467s
*
* for (int i = 0; i < M; ++i)
* for (int j = 0; j < N; ++j)
* ys[i] += As[i*M + j] * xs[j];
*/
/*
* // don't use subscripted array access
* [19:05][~]$ g++ -O3 -o gemv gemv.cpp && time ./gemv 10000 10000
*
* real 0m0.980s
* user 0m0.430s
* sys 0m0.480s
*
* float *x = xs, *A = As, *y = ys;
* float *ye = ys + M, *xe = xs + N;
* while (y < ye)
* {
* x = xs;
* while (x < xe)
* *y += *A++ * *x++;
* y++;
* }
*
*/
/*
* // changing < to !=
* float *x = xs, *A = As, *y = ymine;
* float *ye = ymine + M, *xe = xs + N;
* while (y != ye)
* {
* x = xs;
* while (x != xe)
* *y += *A++ * *x++;
* y++;
* }
*
*/
/*
* // register blocking?
* float *x = xs, *A = As, *y = ys;
* float *ye = ys + M, *xe = xs + N;
*
* float *y1 = ys, *y2 = ys + 1;
* float *A1 = As, *A2 = As + N;
*
* while (y1 != ye)
* {
* x = xs;
* while (x != xe)
* {
* *y1 += *A1++ * *x;
* *y2 += *A2++ * *x;
* x++;
* }
* y1+=2; y2+=2;
* A1+=N; A2+=N;
* }
*
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment