Last active
December 19, 2015 18:29
-
-
Save yshl/5999331 to your computer and use it in GitHub Desktop.
test of loop unrolling for matrix vector product in D
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<stdlib.h> | |
#include<assert.h> | |
void gemv(double** a, double alpha, const double* x, size_t m, | |
double beta, double* y, size_t n) | |
{ | |
/* | |
matrix * vector | |
y[i]=beta*y[i]+alpha*a[i][j]*x[j]; | |
*/ | |
size_t i,j; | |
for(i=0; i<n; i++){ | |
double sum=0.0; | |
for(j=0; j<m; j++){ | |
sum+=a[i][j]*x[j]; | |
} | |
y[i]=beta*y[i]+alpha*sum; | |
} | |
} | |
void* malloc1(size_t n, char* message) | |
{ | |
void *p=malloc(n); | |
if(p==NULL){ | |
perror(message); | |
exit(1); | |
} | |
return p; | |
} | |
int main(int argc, char **argv) | |
{ | |
double a00=1.0; | |
double x0=1.0; | |
long iter=200; | |
double scale=2.0; | |
unsigned long n,m; | |
double **a; | |
double *x, *y; | |
long i,j; | |
if(argc<3){ | |
fputs("gemv1_c #row #col\n",stderr); | |
exit(1); | |
} | |
n=atol(argv[1]); | |
m=atol(argv[2]); | |
a=malloc1(sizeof(double*)*n, "memory allocation a"); | |
for(i=0; i<n; i++){ | |
a[i]=malloc1(sizeof(double)*m, "memory allocation a[i]"); | |
} | |
x=malloc1(sizeof(double)*m, "memory allocation x"); | |
y=malloc1(sizeof(double)*n, "memory allocation y"); | |
for(i=0; i<n; i++){ | |
for(j=0; j<m; j++){ | |
a[i][j]=a00; | |
} | |
y[i]=0.0; | |
} | |
for(j=0; j<m; j++){ | |
x[j]=x0; | |
} | |
for(i=0; i<iter; i++){ | |
gemv(a,scale,x,m,1.0,y,n); | |
} | |
printf("%g\n",y[0]); | |
for(i=0; i<n; i++){ | |
assert(y[i]==scale*x0*a00*m*iter); | |
} | |
free(x); | |
free(y); | |
for(i=0; i<n; i++){ | |
free(a[i]); | |
} | |
free(a); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std.stdio; | |
import std.conv; | |
void gemv(const double[][] a, double alpha, const double[] x, | |
double beta, double[] y) | |
{ | |
/* | |
matrix * vector | |
y[i]=beta*y[i]+alpha*a[i][j]*x[j]; | |
*/ | |
foreach(i, ai; a){ | |
double sum=0.0; | |
foreach(j, aij; ai){ | |
sum+=aij*x[j]; | |
} | |
y[i]=beta*y[i]+alpha*sum; | |
} | |
} | |
void main(string[] argv) | |
{ | |
double a00=1.0; | |
double x0=1.0; | |
long iter=200; | |
double scale=2.0; | |
if(argv.length<3){ | |
throw new Exception("gemv1_d #row #col"); | |
} | |
long n=to!(long)(argv[1]); | |
long m=to!(long)(argv[2]); | |
auto a=new double[][](n,m); | |
auto x=new double[m]; | |
auto y=new double[n]; | |
foreach(ref ai; a) ai[]=a00; | |
x[]=x0; | |
y[]=0.0; | |
foreach(i; 0..iter){ | |
gemv(a,scale,x,1.0,y); | |
} | |
writeln(y[0]); | |
foreach(yi; y){ | |
assert(yi==scale*x0*a00*m*iter); | |
} | |
return; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<stdlib.h> | |
#include<assert.h> | |
#define unrl 4 | |
void gemv(double **a, double alpha, const double *x, size_t m, | |
double beta, double *y, size_t n) | |
{ | |
/* | |
matrix * vector | |
y[i]=beta*y[i]+alpha*a[i][j]*x[j]; | |
*/ | |
size_t i,j,i1; | |
for(i=0; i+unrl<=n; i+=unrl){ | |
double sum[unrl]={0}; | |
for(j=0; j<m; j++){ | |
for(i1=0; i1<unrl; i1++){ | |
sum[i1]+=a[i+i1][j]*x[j]; | |
} | |
} | |
for(i1=0; i1<unrl; i1++){ | |
y[i+i1]=beta*y[i+i1]+alpha*sum[i1]; | |
} | |
} | |
for(; i<n; i++){ | |
double sum=0.0; | |
for(j=0; j<m; j++){ | |
sum+=a[i][j]*x[j]; | |
} | |
y[i]=beta*y[i]+alpha*sum; | |
} | |
} | |
void* malloc1(size_t n, const char* message) | |
{ | |
void *p=malloc(n); | |
if(p==NULL){ | |
perror(message); | |
exit(1); | |
} | |
return p; | |
} | |
int main(int argc, char **argv) | |
{ | |
double a00=1.0; | |
double x0=1.0; | |
long iter=200; | |
double scale=2.0; | |
long n, m; | |
double **a; | |
double *x, *y; | |
long i,j; | |
if(argc<3){ | |
fputs("gemv2_c #row #col\n",stderr); | |
exit(1); | |
} | |
n=atol(argv[1]); | |
m=atol(argv[2]); | |
a=malloc1(sizeof(double*)*n, "memory allocation a"); | |
for(i=0; i<n; i++){ | |
a[i]=malloc1(sizeof(double)*m, "memory allocation a[i]"); | |
} | |
x=malloc1(sizeof(double)*m, "memory allocation x"); | |
y=malloc1(sizeof(double)*n, "memory allocation y"); | |
for(i=0; i<n; i++){ | |
for(j=0; j<m; j++){ | |
a[i][j]=a00; | |
} | |
y[j]=0.0; | |
} | |
for(j=0; j<m; j++){ | |
x[j]=x0; | |
} | |
for(i=0; i<iter; i++){ | |
gemv(a,scale,x,m,1.0,y,n); | |
} | |
printf("%g\n",y[0]); | |
for(i=0; i<n; i++){ | |
assert(y[i]==scale*x0*a00*m*iter); | |
} | |
free(x); | |
free(y); | |
for(i=0; i<n; i++){ | |
free(a[i]); | |
} | |
free(a); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std.stdio; | |
import std.conv; | |
void gemv(const double[][] a, double alpha, const double[] x, | |
double beta, double[] y) | |
{ | |
/* | |
matrix * vector | |
y[i]=beta*y[i]+alpha*a[i][j]*x[j]; | |
*/ | |
const int unrl=4; | |
const size_t arow=a.length; | |
for(size_t i=0; i+unrl<=arow; i+=unrl){ | |
double[unrl] sum; | |
sum[]=0.0; | |
foreach(j, xj; x){ | |
foreach(i1; 0..unrl){ | |
sum[i1]+=a[i+i1][j]*xj; | |
} | |
} | |
y[i..i+unrl]=beta*y[i..i+unrl]+alpha*sum[]; | |
} | |
foreach(i; arow-arow%unrl..arow){ | |
double sum=0.0; | |
foreach(j, xj; x){ | |
sum+=a[i][j]*xj; | |
} | |
y[i]=beta*y[i]+alpha*sum; | |
} | |
} | |
void main(string[] argv) | |
{ | |
double a00=1.0; | |
double x0=1.0; | |
int iter=200; | |
double scale=2.0; | |
if(argv.length<3){ | |
throw new Exception("gemv2_d #row #col"); | |
} | |
long n=to!long(argv[1]); | |
long m=to!long(argv[2]); | |
auto a=new double[][](n,m); | |
auto x=new double[m]; | |
auto y=new double[n]; | |
foreach(ref ai; a)ai[]=a00; | |
x[]=x0; | |
y[]=0.0; | |
foreach(i; 0..iter){ | |
gemv(a,scale,x,1.0,y); | |
} | |
writeln(y[0]); | |
foreach(yi; y){ | |
assert(yi==scale*x0*a00*m*iter); | |
} | |
return; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<stdlib.h> | |
#include<assert.h> | |
#define unrl 4 | |
void gemv(double **a, double alpha, const double *x, size_t m, | |
double beta, double *y, size_t n) | |
{ | |
/* | |
matrix * vector | |
y[i]=beta*y[i]+alpha*a[i][j]*x[j]; | |
*/ | |
size_t i,j,i1; | |
for(i=0; i+unrl<=n; i+=unrl){ | |
double sum[unrl]={0}; | |
for(j=0; j<m; j++){ | |
sum[0]+=a[i+0][j]*x[j]; | |
sum[1]+=a[i+1][j]*x[j]; | |
sum[2]+=a[i+2][j]*x[j]; | |
sum[3]+=a[i+3][j]*x[j]; | |
} | |
for(i1=0; i1<unrl; i1++){ | |
y[i+i1]=beta*y[i+i1]+alpha*sum[i1]; | |
} | |
} | |
for(; i<n; i++){ | |
double sum=0.0; | |
for(j=0; j<m; j++){ | |
sum+=a[i][j]*x[j]; | |
} | |
y[i]=beta*y[i]+alpha*sum; | |
} | |
} | |
void* malloc1(size_t n, const char* message) | |
{ | |
void *p=malloc(n); | |
if(p==NULL){ | |
perror(message); | |
exit(1); | |
} | |
return p; | |
} | |
int main(int argc, char **argv) | |
{ | |
double a00=1.0; | |
double x0=1.0; | |
long iter=200; | |
double scale=2.0; | |
long n, m; | |
double **a; | |
double *x, *y; | |
long i,j; | |
if(argc<3){ | |
fputs("gemv2_c #row #col\n",stderr); | |
exit(1); | |
} | |
n=atol(argv[1]); | |
m=atol(argv[2]); | |
a=malloc1(sizeof(double*)*n, "memory allocation a"); | |
for(i=0; i<n; i++){ | |
a[i]=malloc1(sizeof(double)*m, "memory allocation a[i]"); | |
} | |
x=malloc1(sizeof(double)*m, "memory allocation x"); | |
y=malloc1(sizeof(double)*n, "memory allocation y"); | |
for(i=0; i<n; i++){ | |
for(j=0; j<m; j++){ | |
a[i][j]=a00; | |
} | |
y[j]=0.0; | |
} | |
for(j=0; j<m; j++){ | |
x[j]=x0; | |
} | |
for(i=0; i<iter; i++){ | |
gemv(a,scale,x,m,1.0,y,n); | |
} | |
printf("%g\n",y[0]); | |
for(i=0; i<n; i++){ | |
assert(y[i]==scale*x0*a00*m*iter); | |
} | |
free(x); | |
free(y); | |
for(i=0; i<n; i++){ | |
free(a[i]); | |
} | |
free(a); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std.stdio; | |
import std.conv; | |
template unroll(int n) | |
{ | |
static if(n>0){ | |
const string sum_n="sum["~to!string(n-1)~"]"; | |
const string aij="a[i+"~to!string(n-1)~"][j]"; | |
const string unroll=unroll!(n-1) | |
~sum_n~"+="~aij~"*xj;\n"; | |
}else{ | |
const string unroll=""; | |
} | |
} | |
void gemv(const double[][] a, double alpha, const double[] x, | |
double beta, double[] y) | |
{ | |
/* | |
matrix * vector | |
y[i]=beta*y[i]+alpha*a[i][j]*x[j]; | |
*/ | |
const int unrl=4; | |
const size_t arow=a.length; | |
for(size_t i=0; i+unrl<=arow; i+=unrl){ | |
double[unrl] sum; | |
sum[]=0.0; | |
foreach(j, xj; x){ | |
mixin(unroll!(unrl)); | |
} | |
y[i..i+unrl]=beta*y[i..i+unrl]+alpha*sum[]; | |
} | |
foreach(i; arow-arow%unrl..arow){ | |
double sum=0.0; | |
foreach(j, xj; x){ | |
sum+=a[i][j]*xj; | |
} | |
y[i]=beta*y[i]+alpha*sum; | |
} | |
} | |
void main(string[] argv) | |
{ | |
double a00=1.0; | |
double x0=1.0; | |
int iter=200; | |
double scale=2.0; | |
if(argv.length<3){ | |
throw new Exception("gemv2_d #row #col"); | |
} | |
long n=to!long(argv[1]); | |
long m=to!long(argv[2]); | |
auto a=new double[][](n,m); | |
auto x=new double[m]; | |
auto y=new double[n]; | |
foreach(ref ai; a)ai[]=a00; | |
x[]=x0; | |
y[]=0.0; | |
foreach(i; 0..iter){ | |
gemv(a,scale,x,1.0,y); | |
} | |
writeln(y[0]); | |
foreach(yi; y){ | |
assert(yi==scale*x0*a00*m*iter); | |
} | |
return; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<stdlib.h> | |
#include<assert.h> | |
#define unrl 4 | |
void gemv(double **a, double alpha, const double *x, size_t m, | |
double beta, double *y, size_t n) | |
{ | |
/* | |
matrix * vector | |
y[i]=beta*y[i]+alpha*a[i][j]*x[j]; | |
*/ | |
size_t i,j; | |
for(i=0; i+unrl<=n; i+=unrl){ | |
double sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0; | |
for(j=0; j<m; j++){ | |
sum0+=a[i+0][j]*x[j]; | |
sum1+=a[i+1][j]*x[j]; | |
sum2+=a[i+2][j]*x[j]; | |
sum3+=a[i+3][j]*x[j]; | |
} | |
y[i+0]=beta*y[i+0]+alpha*sum0; | |
y[i+1]=beta*y[i+1]+alpha*sum1; | |
y[i+2]=beta*y[i+2]+alpha*sum2; | |
y[i+3]=beta*y[i+3]+alpha*sum3; | |
} | |
for(; i<n; i++){ | |
double sum=0.0; | |
for(j=0; j<m; j++){ | |
sum+=a[i][j]*x[j]; | |
} | |
y[i]=beta*y[i]+alpha*sum; | |
} | |
} | |
void* malloc1(size_t n, const char* message) | |
{ | |
void *p=malloc(n); | |
if(p==NULL){ | |
perror(message); | |
exit(1); | |
} | |
return p; | |
} | |
int main(int argc, char **argv) | |
{ | |
double a00=1.0; | |
double x0=1.0; | |
long iter=200; | |
double scale=2.0; | |
long n, m; | |
double **a; | |
double *x, *y; | |
long i,j; | |
if(argc<3){ | |
fputs("gemv2_c #row #col\n",stderr); | |
exit(1); | |
} | |
n=atol(argv[1]); | |
m=atol(argv[2]); | |
a=malloc1(sizeof(double*)*n, "memory allocation a"); | |
for(i=0; i<n; i++){ | |
a[i]=malloc1(sizeof(double)*m, "memory allocation a[i]"); | |
} | |
x=malloc1(sizeof(double)*m, "memory allocation x"); | |
y=malloc1(sizeof(double)*n, "memory allocation y"); | |
for(i=0; i<n; i++){ | |
for(j=0; j<m; j++){ | |
a[i][j]=a00; | |
} | |
y[j]=0.0; | |
} | |
for(j=0; j<m; j++){ | |
x[j]=x0; | |
} | |
for(i=0; i<iter; i++){ | |
gemv(a,scale,x,m,1.0,y,n); | |
} | |
printf("%g\n",y[0]); | |
for(i=0; i<n; i++){ | |
assert(y[i]==scale*x0*a00*m*iter); | |
} | |
free(x); | |
free(y); | |
for(i=0; i<n; i++){ | |
free(a[i]); | |
} | |
free(a); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std.stdio; | |
import std.conv; | |
template unroll_variable(int n) | |
{ | |
static if(n>0){ | |
const string sum_n="sum"~to!string(n); | |
const string unroll_variable=unroll_variable!(n-1) | |
~"double "~sum_n~"=0.0;\n"; | |
}else{ | |
const string unroll_variable=""; | |
} | |
} | |
template unroll_mul(int n) | |
{ | |
static if(n>0){ | |
const string sum_n="sum"~to!string(n); | |
const string aij="a[i+"~to!string(n-1)~"][j]"; | |
const string unroll_mul=unroll_mul!(n-1) | |
~sum_n~"+="~aij~"*xj;\n"; | |
}else{ | |
const string unroll_mul=""; | |
} | |
} | |
template unroll_add(int n) | |
{ | |
static if(n>0){ | |
const string sum_n="sum"~to!string(n); | |
const string yi="y[i+"~to!string(n-1)~"]"; | |
const char[] unroll_add=unroll_add!(n-1) | |
~yi~"=beta*"~yi~"+alpha*"~sum_n~";\n"; | |
}else{ | |
const char[] unroll_add=""; | |
} | |
} | |
void gemv(const double[][] a, double alpha, const double[] x, | |
double beta, double[] y) | |
{ | |
/* | |
matrix * vector | |
y[i]=beta*y[i]+alpha*a[i][j]*x[j]; | |
*/ | |
const int unrl=4; | |
const size_t arow=a.length; | |
for(size_t i=0; i+unrl<=arow; i+=unrl){ | |
mixin(unroll_variable!(unrl)); | |
foreach(j, xj; x){ | |
mixin(unroll_mul!(unrl)); | |
} | |
mixin(unroll_add!(unrl)); | |
} | |
foreach(i; arow-arow%unrl..arow){ | |
double sum=0.0; | |
foreach(j, xj; x){ | |
sum+=a[i][j]*xj; | |
} | |
y[i]=beta*y[i]+alpha*sum; | |
} | |
} | |
void main(string[] argv) | |
{ | |
size_t n=1000, m=1000; | |
double a00=1.0; | |
double x0=1.0; | |
int iter=200; | |
double scale=2.0; | |
if(argv.length<3){ | |
throw new Exception("gemv3_d #row #col"); | |
} | |
n=to!long(argv[1]); | |
m=to!long(argv[2]); | |
auto a=new double[][](n,m); | |
auto x=new double[m]; | |
auto y=new double[n]; | |
foreach(ref ai; a)ai[]=a00; | |
x[]=x0; | |
y[]=0.0; | |
foreach(i; 0..iter){ | |
gemv(a,scale,x,1.0,y); | |
} | |
writeln(y[0]); | |
foreach(yi; y){ | |
assert(yi==scale*x0*a00*m*iter); | |
} | |
return; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CC=gcc | |
CFLAGS=-Wall -O3 -march=native | |
DC=dmd | |
DFLAGS=-O -release -inline -m64 | |
PROG=gemv1_c gemv2_c gemv3_c gemv4_c gemv1_d gemv2_d gemv3_d gemv4_d | |
all: $(PROG) | |
gemv1_c: gemv1_c.c | |
$(CC) $(CFLAGS) -o $@ $< | |
gemv2_c: gemv2_c.c | |
$(CC) $(CFLAGS) -o $@ $< | |
gemv3_c: gemv3_c.c | |
$(CC) $(CFLAGS) -o $@ $< | |
gemv4_c: gemv4_c.c | |
$(CC) $(CFLAGS) -o $@ $< | |
gemv1_d: gemv1_d.d | |
$(DC) $(DFLAGS) -of$@ $< | |
gemv2_d: gemv2_d.d | |
$(DC) $(DFLAGS) -of$@ $< | |
gemv3_d: gemv3_d.d | |
$(DC) $(DFLAGS) -of$@ $< | |
gemv4_d: gemv4_d.d | |
$(DC) $(DFLAGS) -of$@ $< | |
test: $(PROG) | |
@for p in $(PROG); do \ | |
echo $$p; \ | |
/usr/bin/time -f 'elapsed time %e' ./$$p 1000 1500; \ | |
/usr/bin/time -f 'elapsed time %e' ./$$p 1000 1500; \ | |
done | |
clean: | |
$(RM) $(PROG) *.o |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment