Skip to content

Instantly share code, notes, and snippets.

@yshl
Last active December 19, 2015 18:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yshl/5999331 to your computer and use it in GitHub Desktop.
Save yshl/5999331 to your computer and use it in GitHub Desktop.
test of loop unrolling for matrix vector product in D
#include<stdio.h>
#include<stdlib.h>
#include<assert.h>
void gemv(double** a, double alpha, const double* x, size_t m,
double beta, double* y, size_t n)
{
/*
matrix * vector
y[i]=beta*y[i]+alpha*a[i][j]*x[j];
*/
size_t i,j;
for(i=0; i<n; i++){
double sum=0.0;
for(j=0; j<m; j++){
sum+=a[i][j]*x[j];
}
y[i]=beta*y[i]+alpha*sum;
}
}
void* malloc1(size_t n, char* message)
{
void *p=malloc(n);
if(p==NULL){
perror(message);
exit(1);
}
return p;
}
int main(int argc, char **argv)
{
double a00=1.0;
double x0=1.0;
long iter=200;
double scale=2.0;
unsigned long n,m;
double **a;
double *x, *y;
long i,j;
if(argc<3){
fputs("gemv1_c #row #col\n",stderr);
exit(1);
}
n=atol(argv[1]);
m=atol(argv[2]);
a=malloc1(sizeof(double*)*n, "memory allocation a");
for(i=0; i<n; i++){
a[i]=malloc1(sizeof(double)*m, "memory allocation a[i]");
}
x=malloc1(sizeof(double)*m, "memory allocation x");
y=malloc1(sizeof(double)*n, "memory allocation y");
for(i=0; i<n; i++){
for(j=0; j<m; j++){
a[i][j]=a00;
}
y[i]=0.0;
}
for(j=0; j<m; j++){
x[j]=x0;
}
for(i=0; i<iter; i++){
gemv(a,scale,x,m,1.0,y,n);
}
printf("%g\n",y[0]);
for(i=0; i<n; i++){
assert(y[i]==scale*x0*a00*m*iter);
}
free(x);
free(y);
for(i=0; i<n; i++){
free(a[i]);
}
free(a);
return 0;
}
import std.stdio;
import std.conv;
void gemv(const double[][] a, double alpha, const double[] x,
double beta, double[] y)
{
/*
matrix * vector
y[i]=beta*y[i]+alpha*a[i][j]*x[j];
*/
foreach(i, ai; a){
double sum=0.0;
foreach(j, aij; ai){
sum+=aij*x[j];
}
y[i]=beta*y[i]+alpha*sum;
}
}
void main(string[] argv)
{
double a00=1.0;
double x0=1.0;
long iter=200;
double scale=2.0;
if(argv.length<3){
throw new Exception("gemv1_d #row #col");
}
long n=to!(long)(argv[1]);
long m=to!(long)(argv[2]);
auto a=new double[][](n,m);
auto x=new double[m];
auto y=new double[n];
foreach(ref ai; a) ai[]=a00;
x[]=x0;
y[]=0.0;
foreach(i; 0..iter){
gemv(a,scale,x,1.0,y);
}
writeln(y[0]);
foreach(yi; y){
assert(yi==scale*x0*a00*m*iter);
}
return;
}
#include<stdio.h>
#include<stdlib.h>
#include<assert.h>
#define unrl 4
void gemv(double **a, double alpha, const double *x, size_t m,
double beta, double *y, size_t n)
{
/*
matrix * vector
y[i]=beta*y[i]+alpha*a[i][j]*x[j];
*/
size_t i,j,i1;
for(i=0; i+unrl<=n; i+=unrl){
double sum[unrl]={0};
for(j=0; j<m; j++){
for(i1=0; i1<unrl; i1++){
sum[i1]+=a[i+i1][j]*x[j];
}
}
for(i1=0; i1<unrl; i1++){
y[i+i1]=beta*y[i+i1]+alpha*sum[i1];
}
}
for(; i<n; i++){
double sum=0.0;
for(j=0; j<m; j++){
sum+=a[i][j]*x[j];
}
y[i]=beta*y[i]+alpha*sum;
}
}
void* malloc1(size_t n, const char* message)
{
void *p=malloc(n);
if(p==NULL){
perror(message);
exit(1);
}
return p;
}
int main(int argc, char **argv)
{
double a00=1.0;
double x0=1.0;
long iter=200;
double scale=2.0;
long n, m;
double **a;
double *x, *y;
long i,j;
if(argc<3){
fputs("gemv2_c #row #col\n",stderr);
exit(1);
}
n=atol(argv[1]);
m=atol(argv[2]);
a=malloc1(sizeof(double*)*n, "memory allocation a");
for(i=0; i<n; i++){
a[i]=malloc1(sizeof(double)*m, "memory allocation a[i]");
}
x=malloc1(sizeof(double)*m, "memory allocation x");
y=malloc1(sizeof(double)*n, "memory allocation y");
for(i=0; i<n; i++){
for(j=0; j<m; j++){
a[i][j]=a00;
}
y[j]=0.0;
}
for(j=0; j<m; j++){
x[j]=x0;
}
for(i=0; i<iter; i++){
gemv(a,scale,x,m,1.0,y,n);
}
printf("%g\n",y[0]);
for(i=0; i<n; i++){
assert(y[i]==scale*x0*a00*m*iter);
}
free(x);
free(y);
for(i=0; i<n; i++){
free(a[i]);
}
free(a);
return 0;
}
import std.stdio;
import std.conv;
void gemv(const double[][] a, double alpha, const double[] x,
double beta, double[] y)
{
/*
matrix * vector
y[i]=beta*y[i]+alpha*a[i][j]*x[j];
*/
const int unrl=4;
const size_t arow=a.length;
for(size_t i=0; i+unrl<=arow; i+=unrl){
double[unrl] sum;
sum[]=0.0;
foreach(j, xj; x){
foreach(i1; 0..unrl){
sum[i1]+=a[i+i1][j]*xj;
}
}
y[i..i+unrl]=beta*y[i..i+unrl]+alpha*sum[];
}
foreach(i; arow-arow%unrl..arow){
double sum=0.0;
foreach(j, xj; x){
sum+=a[i][j]*xj;
}
y[i]=beta*y[i]+alpha*sum;
}
}
void main(string[] argv)
{
double a00=1.0;
double x0=1.0;
int iter=200;
double scale=2.0;
if(argv.length<3){
throw new Exception("gemv2_d #row #col");
}
long n=to!long(argv[1]);
long m=to!long(argv[2]);
auto a=new double[][](n,m);
auto x=new double[m];
auto y=new double[n];
foreach(ref ai; a)ai[]=a00;
x[]=x0;
y[]=0.0;
foreach(i; 0..iter){
gemv(a,scale,x,1.0,y);
}
writeln(y[0]);
foreach(yi; y){
assert(yi==scale*x0*a00*m*iter);
}
return;
}
#include<stdio.h>
#include<stdlib.h>
#include<assert.h>
#define unrl 4
void gemv(double **a, double alpha, const double *x, size_t m,
double beta, double *y, size_t n)
{
/*
matrix * vector
y[i]=beta*y[i]+alpha*a[i][j]*x[j];
*/
size_t i,j,i1;
for(i=0; i+unrl<=n; i+=unrl){
double sum[unrl]={0};
for(j=0; j<m; j++){
sum[0]+=a[i+0][j]*x[j];
sum[1]+=a[i+1][j]*x[j];
sum[2]+=a[i+2][j]*x[j];
sum[3]+=a[i+3][j]*x[j];
}
for(i1=0; i1<unrl; i1++){
y[i+i1]=beta*y[i+i1]+alpha*sum[i1];
}
}
for(; i<n; i++){
double sum=0.0;
for(j=0; j<m; j++){
sum+=a[i][j]*x[j];
}
y[i]=beta*y[i]+alpha*sum;
}
}
void* malloc1(size_t n, const char* message)
{
void *p=malloc(n);
if(p==NULL){
perror(message);
exit(1);
}
return p;
}
int main(int argc, char **argv)
{
double a00=1.0;
double x0=1.0;
long iter=200;
double scale=2.0;
long n, m;
double **a;
double *x, *y;
long i,j;
if(argc<3){
fputs("gemv2_c #row #col\n",stderr);
exit(1);
}
n=atol(argv[1]);
m=atol(argv[2]);
a=malloc1(sizeof(double*)*n, "memory allocation a");
for(i=0; i<n; i++){
a[i]=malloc1(sizeof(double)*m, "memory allocation a[i]");
}
x=malloc1(sizeof(double)*m, "memory allocation x");
y=malloc1(sizeof(double)*n, "memory allocation y");
for(i=0; i<n; i++){
for(j=0; j<m; j++){
a[i][j]=a00;
}
y[j]=0.0;
}
for(j=0; j<m; j++){
x[j]=x0;
}
for(i=0; i<iter; i++){
gemv(a,scale,x,m,1.0,y,n);
}
printf("%g\n",y[0]);
for(i=0; i<n; i++){
assert(y[i]==scale*x0*a00*m*iter);
}
free(x);
free(y);
for(i=0; i<n; i++){
free(a[i]);
}
free(a);
return 0;
}
import std.stdio;
import std.conv;
template unroll(int n)
{
static if(n>0){
const string sum_n="sum["~to!string(n-1)~"]";
const string aij="a[i+"~to!string(n-1)~"][j]";
const string unroll=unroll!(n-1)
~sum_n~"+="~aij~"*xj;\n";
}else{
const string unroll="";
}
}
void gemv(const double[][] a, double alpha, const double[] x,
double beta, double[] y)
{
/*
matrix * vector
y[i]=beta*y[i]+alpha*a[i][j]*x[j];
*/
const int unrl=4;
const size_t arow=a.length;
for(size_t i=0; i+unrl<=arow; i+=unrl){
double[unrl] sum;
sum[]=0.0;
foreach(j, xj; x){
mixin(unroll!(unrl));
}
y[i..i+unrl]=beta*y[i..i+unrl]+alpha*sum[];
}
foreach(i; arow-arow%unrl..arow){
double sum=0.0;
foreach(j, xj; x){
sum+=a[i][j]*xj;
}
y[i]=beta*y[i]+alpha*sum;
}
}
void main(string[] argv)
{
double a00=1.0;
double x0=1.0;
int iter=200;
double scale=2.0;
if(argv.length<3){
throw new Exception("gemv2_d #row #col");
}
long n=to!long(argv[1]);
long m=to!long(argv[2]);
auto a=new double[][](n,m);
auto x=new double[m];
auto y=new double[n];
foreach(ref ai; a)ai[]=a00;
x[]=x0;
y[]=0.0;
foreach(i; 0..iter){
gemv(a,scale,x,1.0,y);
}
writeln(y[0]);
foreach(yi; y){
assert(yi==scale*x0*a00*m*iter);
}
return;
}
#include<stdio.h>
#include<stdlib.h>
#include<assert.h>
#define unrl 4
void gemv(double **a, double alpha, const double *x, size_t m,
double beta, double *y, size_t n)
{
/*
matrix * vector
y[i]=beta*y[i]+alpha*a[i][j]*x[j];
*/
size_t i,j;
for(i=0; i+unrl<=n; i+=unrl){
double sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0;
for(j=0; j<m; j++){
sum0+=a[i+0][j]*x[j];
sum1+=a[i+1][j]*x[j];
sum2+=a[i+2][j]*x[j];
sum3+=a[i+3][j]*x[j];
}
y[i+0]=beta*y[i+0]+alpha*sum0;
y[i+1]=beta*y[i+1]+alpha*sum1;
y[i+2]=beta*y[i+2]+alpha*sum2;
y[i+3]=beta*y[i+3]+alpha*sum3;
}
for(; i<n; i++){
double sum=0.0;
for(j=0; j<m; j++){
sum+=a[i][j]*x[j];
}
y[i]=beta*y[i]+alpha*sum;
}
}
void* malloc1(size_t n, const char* message)
{
void *p=malloc(n);
if(p==NULL){
perror(message);
exit(1);
}
return p;
}
int main(int argc, char **argv)
{
double a00=1.0;
double x0=1.0;
long iter=200;
double scale=2.0;
long n, m;
double **a;
double *x, *y;
long i,j;
if(argc<3){
fputs("gemv2_c #row #col\n",stderr);
exit(1);
}
n=atol(argv[1]);
m=atol(argv[2]);
a=malloc1(sizeof(double*)*n, "memory allocation a");
for(i=0; i<n; i++){
a[i]=malloc1(sizeof(double)*m, "memory allocation a[i]");
}
x=malloc1(sizeof(double)*m, "memory allocation x");
y=malloc1(sizeof(double)*n, "memory allocation y");
for(i=0; i<n; i++){
for(j=0; j<m; j++){
a[i][j]=a00;
}
y[j]=0.0;
}
for(j=0; j<m; j++){
x[j]=x0;
}
for(i=0; i<iter; i++){
gemv(a,scale,x,m,1.0,y,n);
}
printf("%g\n",y[0]);
for(i=0; i<n; i++){
assert(y[i]==scale*x0*a00*m*iter);
}
free(x);
free(y);
for(i=0; i<n; i++){
free(a[i]);
}
free(a);
return 0;
}
import std.stdio;
import std.conv;
template unroll_variable(int n)
{
static if(n>0){
const string sum_n="sum"~to!string(n);
const string unroll_variable=unroll_variable!(n-1)
~"double "~sum_n~"=0.0;\n";
}else{
const string unroll_variable="";
}
}
template unroll_mul(int n)
{
static if(n>0){
const string sum_n="sum"~to!string(n);
const string aij="a[i+"~to!string(n-1)~"][j]";
const string unroll_mul=unroll_mul!(n-1)
~sum_n~"+="~aij~"*xj;\n";
}else{
const string unroll_mul="";
}
}
template unroll_add(int n)
{
static if(n>0){
const string sum_n="sum"~to!string(n);
const string yi="y[i+"~to!string(n-1)~"]";
const char[] unroll_add=unroll_add!(n-1)
~yi~"=beta*"~yi~"+alpha*"~sum_n~";\n";
}else{
const char[] unroll_add="";
}
}
void gemv(const double[][] a, double alpha, const double[] x,
double beta, double[] y)
{
/*
matrix * vector
y[i]=beta*y[i]+alpha*a[i][j]*x[j];
*/
const int unrl=4;
const size_t arow=a.length;
for(size_t i=0; i+unrl<=arow; i+=unrl){
mixin(unroll_variable!(unrl));
foreach(j, xj; x){
mixin(unroll_mul!(unrl));
}
mixin(unroll_add!(unrl));
}
foreach(i; arow-arow%unrl..arow){
double sum=0.0;
foreach(j, xj; x){
sum+=a[i][j]*xj;
}
y[i]=beta*y[i]+alpha*sum;
}
}
void main(string[] argv)
{
size_t n=1000, m=1000;
double a00=1.0;
double x0=1.0;
int iter=200;
double scale=2.0;
if(argv.length<3){
throw new Exception("gemv3_d #row #col");
}
n=to!long(argv[1]);
m=to!long(argv[2]);
auto a=new double[][](n,m);
auto x=new double[m];
auto y=new double[n];
foreach(ref ai; a)ai[]=a00;
x[]=x0;
y[]=0.0;
foreach(i; 0..iter){
gemv(a,scale,x,1.0,y);
}
writeln(y[0]);
foreach(yi; y){
assert(yi==scale*x0*a00*m*iter);
}
return;
}
CC=gcc
CFLAGS=-Wall -O3 -march=native
DC=dmd
DFLAGS=-O -release -inline -m64
PROG=gemv1_c gemv2_c gemv3_c gemv4_c gemv1_d gemv2_d gemv3_d gemv4_d
all: $(PROG)
gemv1_c: gemv1_c.c
$(CC) $(CFLAGS) -o $@ $<
gemv2_c: gemv2_c.c
$(CC) $(CFLAGS) -o $@ $<
gemv3_c: gemv3_c.c
$(CC) $(CFLAGS) -o $@ $<
gemv4_c: gemv4_c.c
$(CC) $(CFLAGS) -o $@ $<
gemv1_d: gemv1_d.d
$(DC) $(DFLAGS) -of$@ $<
gemv2_d: gemv2_d.d
$(DC) $(DFLAGS) -of$@ $<
gemv3_d: gemv3_d.d
$(DC) $(DFLAGS) -of$@ $<
gemv4_d: gemv4_d.d
$(DC) $(DFLAGS) -of$@ $<
test: $(PROG)
@for p in $(PROG); do \
echo $$p; \
/usr/bin/time -f 'elapsed time %e' ./$$p 1000 1500; \
/usr/bin/time -f 'elapsed time %e' ./$$p 1000 1500; \
done
clean:
$(RM) $(PROG) *.o
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment