{{ message }}

Instantly share code, notes, and snippets.

# yshl/Makefile

Last active Dec 19, 2015
test of loop unrolling for matrix vector product in D
 #include #include #include void gemv(double** a, double alpha, const double* x, size_t m, double beta, double* y, size_t n) { /* matrix * vector y[i]=beta*y[i]+alpha*a[i][j]*x[j]; */ size_t i,j; for(i=0; i
 import std.stdio; import std.conv; void gemv(const double[][] a, double alpha, const double[] x, double beta, double[] y) { /* matrix * vector y[i]=beta*y[i]+alpha*a[i][j]*x[j]; */ foreach(i, ai; a){ double sum=0.0; foreach(j, aij; ai){ sum+=aij*x[j]; } y[i]=beta*y[i]+alpha*sum; } } void main(string[] argv) { double a00=1.0; double x0=1.0; long iter=200; double scale=2.0; if(argv.length<3){ throw new Exception("gemv1_d #row #col"); } long n=to!(long)(argv[1]); long m=to!(long)(argv[2]); auto a=new double[][](n,m); auto x=new double[m]; auto y=new double[n]; foreach(ref ai; a) ai[]=a00; x[]=x0; y[]=0.0; foreach(i; 0..iter){ gemv(a,scale,x,1.0,y); } writeln(y[0]); foreach(yi; y){ assert(yi==scale*x0*a00*m*iter); } return; }
 #include #include #include #define unrl 4 void gemv(double **a, double alpha, const double *x, size_t m, double beta, double *y, size_t n) { /* matrix * vector y[i]=beta*y[i]+alpha*a[i][j]*x[j]; */ size_t i,j,i1; for(i=0; i+unrl<=n; i+=unrl){ double sum[unrl]={0}; for(j=0; j
 import std.stdio; import std.conv; void gemv(const double[][] a, double alpha, const double[] x, double beta, double[] y) { /* matrix * vector y[i]=beta*y[i]+alpha*a[i][j]*x[j]; */ const int unrl=4; const size_t arow=a.length; for(size_t i=0; i+unrl<=arow; i+=unrl){ double[unrl] sum; sum[]=0.0; foreach(j, xj; x){ foreach(i1; 0..unrl){ sum[i1]+=a[i+i1][j]*xj; } } y[i..i+unrl]=beta*y[i..i+unrl]+alpha*sum[]; } foreach(i; arow-arow%unrl..arow){ double sum=0.0; foreach(j, xj; x){ sum+=a[i][j]*xj; } y[i]=beta*y[i]+alpha*sum; } } void main(string[] argv) { double a00=1.0; double x0=1.0; int iter=200; double scale=2.0; if(argv.length<3){ throw new Exception("gemv2_d #row #col"); } long n=to!long(argv[1]); long m=to!long(argv[2]); auto a=new double[][](n,m); auto x=new double[m]; auto y=new double[n]; foreach(ref ai; a)ai[]=a00; x[]=x0; y[]=0.0; foreach(i; 0..iter){ gemv(a,scale,x,1.0,y); } writeln(y[0]); foreach(yi; y){ assert(yi==scale*x0*a00*m*iter); } return; }
 #include #include #include #define unrl 4 void gemv(double **a, double alpha, const double *x, size_t m, double beta, double *y, size_t n) { /* matrix * vector y[i]=beta*y[i]+alpha*a[i][j]*x[j]; */ size_t i,j,i1; for(i=0; i+unrl<=n; i+=unrl){ double sum[unrl]={0}; for(j=0; j
 import std.stdio; import std.conv; template unroll(int n) { static if(n>0){ const string sum_n="sum["~to!string(n-1)~"]"; const string aij="a[i+"~to!string(n-1)~"][j]"; const string unroll=unroll!(n-1) ~sum_n~"+="~aij~"*xj;\n"; }else{ const string unroll=""; } } void gemv(const double[][] a, double alpha, const double[] x, double beta, double[] y) { /* matrix * vector y[i]=beta*y[i]+alpha*a[i][j]*x[j]; */ const int unrl=4; const size_t arow=a.length; for(size_t i=0; i+unrl<=arow; i+=unrl){ double[unrl] sum; sum[]=0.0; foreach(j, xj; x){ mixin(unroll!(unrl)); } y[i..i+unrl]=beta*y[i..i+unrl]+alpha*sum[]; } foreach(i; arow-arow%unrl..arow){ double sum=0.0; foreach(j, xj; x){ sum+=a[i][j]*xj; } y[i]=beta*y[i]+alpha*sum; } } void main(string[] argv) { double a00=1.0; double x0=1.0; int iter=200; double scale=2.0; if(argv.length<3){ throw new Exception("gemv2_d #row #col"); } long n=to!long(argv[1]); long m=to!long(argv[2]); auto a=new double[][](n,m); auto x=new double[m]; auto y=new double[n]; foreach(ref ai; a)ai[]=a00; x[]=x0; y[]=0.0; foreach(i; 0..iter){ gemv(a,scale,x,1.0,y); } writeln(y[0]); foreach(yi; y){ assert(yi==scale*x0*a00*m*iter); } return; }
 #include #include #include #define unrl 4 void gemv(double **a, double alpha, const double *x, size_t m, double beta, double *y, size_t n) { /* matrix * vector y[i]=beta*y[i]+alpha*a[i][j]*x[j]; */ size_t i,j; for(i=0; i+unrl<=n; i+=unrl){ double sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0; for(j=0; j
 import std.stdio; import std.conv; template unroll_variable(int n) { static if(n>0){ const string sum_n="sum"~to!string(n); const string unroll_variable=unroll_variable!(n-1) ~"double "~sum_n~"=0.0;\n"; }else{ const string unroll_variable=""; } } template unroll_mul(int n) { static if(n>0){ const string sum_n="sum"~to!string(n); const string aij="a[i+"~to!string(n-1)~"][j]"; const string unroll_mul=unroll_mul!(n-1) ~sum_n~"+="~aij~"*xj;\n"; }else{ const string unroll_mul=""; } } template unroll_add(int n) { static if(n>0){ const string sum_n="sum"~to!string(n); const string yi="y[i+"~to!string(n-1)~"]"; const char[] unroll_add=unroll_add!(n-1) ~yi~"=beta*"~yi~"+alpha*"~sum_n~";\n"; }else{ const char[] unroll_add=""; } } void gemv(const double[][] a, double alpha, const double[] x, double beta, double[] y) { /* matrix * vector y[i]=beta*y[i]+alpha*a[i][j]*x[j]; */ const int unrl=4; const size_t arow=a.length; for(size_t i=0; i+unrl<=arow; i+=unrl){ mixin(unroll_variable!(unrl)); foreach(j, xj; x){ mixin(unroll_mul!(unrl)); } mixin(unroll_add!(unrl)); } foreach(i; arow-arow%unrl..arow){ double sum=0.0; foreach(j, xj; x){ sum+=a[i][j]*xj; } y[i]=beta*y[i]+alpha*sum; } } void main(string[] argv) { size_t n=1000, m=1000; double a00=1.0; double x0=1.0; int iter=200; double scale=2.0; if(argv.length<3){ throw new Exception("gemv3_d #row #col"); } n=to!long(argv[1]); m=to!long(argv[2]); auto a=new double[][](n,m); auto x=new double[m]; auto y=new double[n]; foreach(ref ai; a)ai[]=a00; x[]=x0; y[]=0.0; foreach(i; 0..iter){ gemv(a,scale,x,1.0,y); } writeln(y[0]); foreach(yi; y){ assert(yi==scale*x0*a00*m*iter); } return; }
 CC=gcc CFLAGS=-Wall -O3 -march=native DC=dmd DFLAGS=-O -release -inline -m64 PROG=gemv1_c gemv2_c gemv3_c gemv4_c gemv1_d gemv2_d gemv3_d gemv4_d all: \$(PROG) gemv1_c: gemv1_c.c \$(CC) \$(CFLAGS) -o \$@ \$< gemv2_c: gemv2_c.c \$(CC) \$(CFLAGS) -o \$@ \$< gemv3_c: gemv3_c.c \$(CC) \$(CFLAGS) -o \$@ \$< gemv4_c: gemv4_c.c \$(CC) \$(CFLAGS) -o \$@ \$< gemv1_d: gemv1_d.d \$(DC) \$(DFLAGS) -of\$@ \$< gemv2_d: gemv2_d.d \$(DC) \$(DFLAGS) -of\$@ \$< gemv3_d: gemv3_d.d \$(DC) \$(DFLAGS) -of\$@ \$< gemv4_d: gemv4_d.d \$(DC) \$(DFLAGS) -of\$@ \$< test: \$(PROG) @for p in \$(PROG); do \ echo \$\$p; \ /usr/bin/time -f 'elapsed time %e' ./\$\$p 1000 1500; \ /usr/bin/time -f 'elapsed time %e' ./\$\$p 1000 1500; \ done clean: \$(RM) \$(PROG) *.o