Last active
March 1, 2017 14:22
-
-
Save remyd1/7711c3e6e5a12e674f6b6d773fe37472 to your computer and use it in GitHub Desktop.
Modified version of linpackc.new from netlib (with a timeout (120s) and no question (using default array 200*200))
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BootStrap: docker | |
From: ubuntu:16.04 | |
IncludeCmd: yes | |
%post | |
sed -i 's/main/main restricted universe/g' /etc/apt/sources.list | |
apt-get update | |
apt-get install -y bash wget build-essential gcc time libc6-dev libgcc-5-dev | |
mkdir /usr/local/test | |
cd /usr/local/test | |
wget --no-check-certificate https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/linpack_simple_timeout.c | |
wget --no-check-certificate https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/get_flops.sh | |
gcc -O3 -march=native -o linpack_simple -lm linpack_simple_timeout.c | |
%runscript | |
/usr/bin/time -o /usr/local/test/time_simple_linpack.o /usr/local/test/linpack_simple > /usr/local/test/results_simple_linpack.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM centos:centos7 | |
RUN yum update; yum clean all; | |
#RUN yum -y install epel-release; yum clean all | |
RUN yum -y install wget time gcc.x86_64 glibc-devel; yum clean all; | |
WORKDIR /usr/local/test | |
RUN wget https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/linpack_simple_timeout.c | |
RUN wget https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/get_flops.sh | |
RUN gcc -O3 -march=native -o linpack_simple -lm linpack_simple_timeout.c | |
# run the benchs | |
#RUN /usr/bin/time -o time_simple_linpack.o ./linpack_simple > results_simple_linpack.o | |
#CMD bash get_flops.sh results_simple_linpack.o && cat time_simple_linpack.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
min=`awk '$1 ~ "[0-9]+" {print $6}' $1 |sort -k6 |head -1` | |
max=`awk '$1 ~ "[0-9]+" {print $6}' $1 |sort -k6 |tail -1` | |
echo "min:"$min | |
echo "max:"$max | |
echo -n "moyenne:" | |
awk '$1 ~ "[0-9]+" {total=total+$6;n=n+1;} END{print total/n;}' $1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
** | |
** LINPACK.C Linpack benchmark, calculates FLOPS. | |
** (FLoating Point Operations Per Second) | |
** | |
** Translated to C by Bonnie Toy 5/88 | |
** | |
** Modified by Will Menninger, 10/93, with these features: | |
** (modified on 2/25/94 to fix a problem with daxpy for | |
** unequal increments or equal increments not equal to 1. | |
** Jack Dongarra) | |
** | |
** - Defaults to double precision. | |
** - Averages ROLLed and UNROLLed performance. | |
** - User selectable array sizes. | |
** - Automatically does enough repetitions to take at least 10 CPU seconds. | |
** - Prints machine precision. | |
** - ANSI prototyping. | |
** | |
** To compile: cc -O -o linpack linpack.c -lm | |
** | |
** | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <time.h> | |
#include <float.h> | |
#define DP | |
#ifdef SP | |
#define ZERO 0.0 | |
#define ONE 1.0 | |
#define PREC "Single" | |
#define BASE10DIG FLT_DIG | |
typedef float REAL; | |
#endif | |
#ifdef DP | |
#define ZERO 0.0e0 | |
#define ONE 1.0e0 | |
#define PREC "Double" | |
#define BASE10DIG DBL_DIG | |
typedef double REAL; | |
#endif | |
static REAL linpack (long nreps,int arsize); | |
static void matgen (REAL *a,int lda,int n,REAL *b,REAL *norma); | |
static void dgefa (REAL *a,int lda,int n,int *ipvt,int *info,int roll); | |
static void dgesl (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll); | |
static void daxpy_r (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy); | |
static REAL ddot_r (int n,REAL *dx,int incx,REAL *dy,int incy); | |
static void dscal_r (int n,REAL da,REAL *dx,int incx); | |
static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy); | |
static REAL ddot_ur (int n,REAL *dx,int incx,REAL *dy,int incy); | |
static void dscal_ur (int n,REAL da,REAL *dx,int incx); | |
static int idamax (int n,REAL *dx,int incx); | |
static REAL second (void); | |
static void *mempool; | |
void main(void) | |
{ | |
char buf[80]; | |
int arsize; | |
long arsize2d,memreq,nreps; | |
size_t malloc_arg; | |
time_t start, end; | |
double elapsed; // seconds | |
start = time(NULL); | |
int terminate = 1; | |
//while (1) | |
while (terminate) | |
{ | |
end = time(NULL); | |
elapsed = difftime(end, start); | |
if (elapsed >= 120.0 /* seconds */) | |
{ | |
terminate = 0; | |
} | |
else | |
/*printf("Enter array size (q to quit) [200]: "); | |
fgets(buf,79,stdin); | |
if (buf[0]=='q' || buf[0]=='Q') | |
break; | |
if (buf[0]=='\0' || buf[0]=='\n') | |
arsize=200; | |
else | |
arsize=atoi(buf);*/ | |
{ | |
arsize=200; | |
arsize/=2; | |
arsize*=2; | |
if (arsize<10) | |
{ | |
printf("Too small.\n"); | |
continue; | |
} | |
arsize2d = (long)arsize*(long)arsize; | |
memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int); | |
printf("Memory required: %ldK.\n",(memreq+512L)>>10); | |
malloc_arg=(size_t)memreq; | |
if (malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL) | |
{ | |
printf("Not enough memory available for given array size.\n\n"); | |
continue; | |
} | |
printf("\n\nLINPACK benchmark, %s precision.\n",PREC); | |
printf("Machine precision: %d digits.\n",BASE10DIG); | |
printf("Array size %d X %d.\n",arsize,arsize); | |
printf("Average rolled and unrolled performance:\n\n"); | |
printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n"); | |
printf("----------------------------------------------------\n"); | |
nreps=1; | |
while (linpack(nreps,arsize)<10.) | |
nreps*=2; | |
free(mempool); | |
printf("\n"); | |
} | |
} | |
} | |
static REAL linpack(long nreps,int arsize) | |
{ | |
REAL *a,*b; | |
REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; | |
int *ipvt,n,info,lda; | |
long i,arsize2d; | |
lda = arsize; | |
n = arsize/2; | |
arsize2d = (long)arsize*(long)arsize; | |
ops=((2.0*n*n*n)/3.0+2.0*n*n); | |
a=(REAL *)mempool; | |
b=a+arsize2d; | |
ipvt=(int *)&b[arsize]; | |
tdgesl=0; | |
tdgefa=0; | |
totalt=second(); | |
for (i=0;i<nreps;i++) | |
{ | |
matgen(a,lda,n,b,&norma); | |
t1 = second(); | |
dgefa(a,lda,n,ipvt,&info,1); | |
tdgefa += second()-t1; | |
t1 = second(); | |
dgesl(a,lda,n,ipvt,b,0,1); | |
tdgesl += second()-t1; | |
} | |
for (i=0;i<nreps;i++) | |
{ | |
matgen(a,lda,n,b,&norma); | |
t1 = second(); | |
dgefa(a,lda,n,ipvt,&info,0); | |
tdgefa += second()-t1; | |
t1 = second(); | |
dgesl(a,lda,n,ipvt,b,0,0); | |
tdgesl += second()-t1; | |
} | |
totalt=second()-totalt; | |
if (totalt<0.5 || tdgefa+tdgesl<0.2) | |
return(0.); | |
kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); | |
toverhead=totalt-tdgefa-tdgesl; | |
if (tdgefa<0.) | |
tdgefa=0.; | |
if (tdgesl<0.) | |
tdgesl=0.; | |
if (toverhead<0.) | |
toverhead=0.; | |
printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", | |
nreps,totalt,100.*tdgefa/totalt, | |
100.*tdgesl/totalt,100.*toverhead/totalt, | |
kflops); | |
return(totalt); | |
} | |
/* | |
** For matgen, | |
** We would like to declare a[][lda], but c does not allow it. In this | |
** function, references to a[i][j] are written a[lda*i+j]. | |
*/ | |
static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma) | |
{ | |
int init,i,j; | |
init = 1325; | |
*norma = 0.0; | |
for (j = 0; j < n; j++) | |
for (i = 0; i < n; i++) | |
{ | |
init = (int)((long)3125*(long)init % 65536L); | |
a[lda*j+i] = (init - 32768.0)/16384.0; | |
*norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma; | |
} | |
for (i = 0; i < n; i++) | |
b[i] = 0.0; | |
for (j = 0; j < n; j++) | |
for (i = 0; i < n; i++) | |
b[i] = b[i] + a[lda*j+i]; | |
} | |
/* | |
** | |
** DGEFA benchmark | |
** | |
** We would like to declare a[][lda], but c does not allow it. In this | |
** function, references to a[i][j] are written a[lda*i+j]. | |
** | |
** dgefa factors a double precision matrix by gaussian elimination. | |
** | |
** dgefa is usually called by dgeco, but it can be called | |
** directly with a saving in time if rcond is not needed. | |
** (time for dgeco) = (1 + 9/n)*(time for dgefa) . | |
** | |
** on entry | |
** | |
** a REAL precision[n][lda] | |
** the matrix to be factored. | |
** | |
** lda integer | |
** the leading dimension of the array a . | |
** | |
** n integer | |
** the order of the matrix a . | |
** | |
** on return | |
** | |
** a an upper triangular matrix and the multipliers | |
** which were used to obtain it. | |
** the factorization can be written a = l*u where | |
** l is a product of permutation and unit lower | |
** triangular matrices and u is upper triangular. | |
** | |
** ipvt integer[n] | |
** an integer vector of pivot indices. | |
** | |
** info integer | |
** = 0 normal value. | |
** = k if u[k][k] .eq. 0.0 . this is not an error | |
** condition for this subroutine, but it does | |
** indicate that dgesl or dgedi will divide by zero | |
** if called. use rcond in dgeco for a reliable | |
** indication of singularity. | |
** | |
** linpack. this version dated 08/14/78 . | |
** cleve moler, university of New Mexico, argonne national lab. | |
** | |
** functions | |
** | |
** blas daxpy,dscal,idamax | |
** | |
*/ | |
static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll) | |
{ | |
REAL t; | |
int idamax(),j,k,kp1,l,nm1; | |
/* gaussian elimination with partial pivoting */ | |
if (roll) | |
{ | |
*info = 0; | |
nm1 = n - 1; | |
if (nm1 >= 0) | |
for (k = 0; k < nm1; k++) | |
{ | |
kp1 = k + 1; | |
/* find l = pivot index */ | |
l = idamax(n-k,&a[lda*k+k],1) + k; | |
ipvt[k] = l; | |
/* zero pivot implies this column already | |
triangularized */ | |
if (a[lda*k+l] != ZERO) | |
{ | |
/* interchange if necessary */ | |
if (l != k) | |
{ | |
t = a[lda*k+l]; | |
a[lda*k+l] = a[lda*k+k]; | |
a[lda*k+k] = t; | |
} | |
/* compute multipliers */ | |
t = -ONE/a[lda*k+k]; | |
dscal_r(n-(k+1),t,&a[lda*k+k+1],1); | |
/* row elimination with column indexing */ | |
for (j = kp1; j < n; j++) | |
{ | |
t = a[lda*j+l]; | |
if (l != k) | |
{ | |
a[lda*j+l] = a[lda*j+k]; | |
a[lda*j+k] = t; | |
} | |
daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1); | |
} | |
} | |
else | |
(*info) = k; | |
} | |
ipvt[n-1] = n-1; | |
if (a[lda*(n-1)+(n-1)] == ZERO) | |
(*info) = n-1; | |
} | |
else | |
{ | |
*info = 0; | |
nm1 = n - 1; | |
if (nm1 >= 0) | |
for (k = 0; k < nm1; k++) | |
{ | |
kp1 = k + 1; | |
/* find l = pivot index */ | |
l = idamax(n-k,&a[lda*k+k],1) + k; | |
ipvt[k] = l; | |
/* zero pivot implies this column already | |
triangularized */ | |
if (a[lda*k+l] != ZERO) | |
{ | |
/* interchange if necessary */ | |
if (l != k) | |
{ | |
t = a[lda*k+l]; | |
a[lda*k+l] = a[lda*k+k]; | |
a[lda*k+k] = t; | |
} | |
/* compute multipliers */ | |
t = -ONE/a[lda*k+k]; | |
dscal_ur(n-(k+1),t,&a[lda*k+k+1],1); | |
/* row elimination with column indexing */ | |
for (j = kp1; j < n; j++) | |
{ | |
t = a[lda*j+l]; | |
if (l != k) | |
{ | |
a[lda*j+l] = a[lda*j+k]; | |
a[lda*j+k] = t; | |
} | |
daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1); | |
} | |
} | |
else | |
(*info) = k; | |
} | |
ipvt[n-1] = n-1; | |
if (a[lda*(n-1)+(n-1)] == ZERO) | |
(*info) = n-1; | |
} | |
} | |
/* | |
** | |
** DGESL benchmark | |
** | |
** We would like to declare a[][lda], but c does not allow it. In this | |
** function, references to a[i][j] are written a[lda*i+j]. | |
** | |
** dgesl solves the double precision system | |
** a * x = b or trans(a) * x = b | |
** using the factors computed by dgeco or dgefa. | |
** | |
** on entry | |
** | |
** a double precision[n][lda] | |
** the output from dgeco or dgefa. | |
** | |
** lda integer | |
** the leading dimension of the array a . | |
** | |
** n integer | |
** the order of the matrix a . | |
** | |
** ipvt integer[n] | |
** the pivot vector from dgeco or dgefa. | |
** | |
** b double precision[n] | |
** the right hand side vector. | |
** | |
** job integer | |
** = 0 to solve a*x = b , | |
** = nonzero to solve trans(a)*x = b where | |
** trans(a) is the transpose. | |
** | |
** on return | |
** | |
** b the solution vector x . | |
** | |
** error condition | |
** | |
** a division by zero will occur if the input factor contains a | |
** zero on the diagonal. technically this indicates singularity | |
** but it is often caused by improper arguments or improper | |
** setting of lda . it will not occur if the subroutines are | |
** called correctly and if dgeco has set rcond .gt. 0.0 | |
** or dgefa has set info .eq. 0 . | |
** | |
** to compute inverse(a) * c where c is a matrix | |
** with p columns | |
** dgeco(a,lda,n,ipvt,rcond,z) | |
** if (!rcond is too small){ | |
** for (j=0,j<p,j++) | |
** dgesl(a,lda,n,ipvt,c[j][0],0); | |
** } | |
** | |
** linpack. this version dated 08/14/78 . | |
** cleve moler, university of new mexico, argonne national lab. | |
** | |
** functions | |
** | |
** blas daxpy,ddot | |
*/ | |
static void dgesl(REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll) | |
{ | |
REAL t; | |
int k,kb,l,nm1; | |
if (roll) | |
{ | |
nm1 = n - 1; | |
if (job == 0) | |
{ | |
/* job = 0 , solve a * x = b */ | |
/* first solve l*y = b */ | |
if (nm1 >= 1) | |
for (k = 0; k < nm1; k++) | |
{ | |
l = ipvt[k]; | |
t = b[l]; | |
if (l != k) | |
{ | |
b[l] = b[k]; | |
b[k] = t; | |
} | |
daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1); | |
} | |
/* now solve u*x = y */ | |
for (kb = 0; kb < n; kb++) | |
{ | |
k = n - (kb + 1); | |
b[k] = b[k]/a[lda*k+k]; | |
t = -b[k]; | |
daxpy_r(k,t,&a[lda*k+0],1,&b[0],1); | |
} | |
} | |
else | |
{ | |
/* job = nonzero, solve trans(a) * x = b */ | |
/* first solve trans(u)*y = b */ | |
for (k = 0; k < n; k++) | |
{ | |
t = ddot_r(k,&a[lda*k+0],1,&b[0],1); | |
b[k] = (b[k] - t)/a[lda*k+k]; | |
} | |
/* now solve trans(l)*x = y */ | |
if (nm1 >= 1) | |
for (kb = 1; kb < nm1; kb++) | |
{ | |
k = n - (kb+1); | |
b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1); | |
l = ipvt[k]; | |
if (l != k) | |
{ | |
t = b[l]; | |
b[l] = b[k]; | |
b[k] = t; | |
} | |
} | |
} | |
} | |
else | |
{ | |
nm1 = n - 1; | |
if (job == 0) | |
{ | |
/* job = 0 , solve a * x = b */ | |
/* first solve l*y = b */ | |
if (nm1 >= 1) | |
for (k = 0; k < nm1; k++) | |
{ | |
l = ipvt[k]; | |
t = b[l]; | |
if (l != k) | |
{ | |
b[l] = b[k]; | |
b[k] = t; | |
} | |
daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1); | |
} | |
/* now solve u*x = y */ | |
for (kb = 0; kb < n; kb++) | |
{ | |
k = n - (kb + 1); | |
b[k] = b[k]/a[lda*k+k]; | |
t = -b[k]; | |
daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1); | |
} | |
} | |
else | |
{ | |
/* job = nonzero, solve trans(a) * x = b */ | |
/* first solve trans(u)*y = b */ | |
for (k = 0; k < n; k++) | |
{ | |
t = ddot_ur(k,&a[lda*k+0],1,&b[0],1); | |
b[k] = (b[k] - t)/a[lda*k+k]; | |
} | |
/* now solve trans(l)*x = y */ | |
if (nm1 >= 1) | |
for (kb = 1; kb < nm1; kb++) | |
{ | |
k = n - (kb+1); | |
b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1); | |
l = ipvt[k]; | |
if (l != k) | |
{ | |
t = b[l]; | |
b[l] = b[k]; | |
b[k] = t; | |
} | |
} | |
} | |
} | |
} | |
/* | |
** Constant times a vector plus a vector. | |
** Jack Dongarra, linpack, 3/11/78. | |
** ROLLED version | |
*/ | |
static void daxpy_r(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy) | |
{ | |
int i,ix,iy; | |
if (n <= 0) | |
return; | |
if (da == ZERO) | |
return; | |
if (incx != 1 || incy != 1) | |
{ | |
/* code for unequal increments or equal increments != 1 */ | |
ix = 1; | |
iy = 1; | |
if(incx < 0) ix = (-n+1)*incx + 1; | |
if(incy < 0)iy = (-n+1)*incy + 1; | |
for (i = 0;i < n; i++) | |
{ | |
dy[iy] = dy[iy] + da*dx[ix]; | |
ix = ix + incx; | |
iy = iy + incy; | |
} | |
return; | |
} | |
/* code for both increments equal to 1 */ | |
for (i = 0;i < n; i++) | |
dy[i] = dy[i] + da*dx[i]; | |
} | |
/* | |
** Forms the dot product of two vectors. | |
** Jack Dongarra, linpack, 3/11/78. | |
** ROLLED version | |
*/ | |
static REAL ddot_r(int n,REAL *dx,int incx,REAL *dy,int incy) | |
{ | |
REAL dtemp; | |
int i,ix,iy; | |
dtemp = ZERO; | |
if (n <= 0) | |
return(ZERO); | |
if (incx != 1 || incy != 1) | |
{ | |
/* code for unequal increments or equal increments != 1 */ | |
ix = 0; | |
iy = 0; | |
if (incx < 0) ix = (-n+1)*incx; | |
if (incy < 0) iy = (-n+1)*incy; | |
for (i = 0;i < n; i++) | |
{ | |
dtemp = dtemp + dx[ix]*dy[iy]; | |
ix = ix + incx; | |
iy = iy + incy; | |
} | |
return(dtemp); | |
} | |
/* code for both increments equal to 1 */ | |
for (i=0;i < n; i++) | |
dtemp = dtemp + dx[i]*dy[i]; | |
return(dtemp); | |
} | |
/* | |
** Scales a vector by a constant. | |
** Jack Dongarra, linpack, 3/11/78. | |
** ROLLED version | |
*/ | |
static void dscal_r(int n,REAL da,REAL *dx,int incx) | |
{ | |
int i,nincx; | |
if (n <= 0) | |
return; | |
if (incx != 1) | |
{ | |
/* code for increment not equal to 1 */ | |
nincx = n*incx; | |
for (i = 0; i < nincx; i = i + incx) | |
dx[i] = da*dx[i]; | |
return; | |
} | |
/* code for increment equal to 1 */ | |
for (i = 0; i < n; i++) | |
dx[i] = da*dx[i]; | |
} | |
/* | |
** constant times a vector plus a vector. | |
** Jack Dongarra, linpack, 3/11/78. | |
** UNROLLED version | |
*/ | |
static void daxpy_ur(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy) | |
{ | |
int i,ix,iy,m; | |
if (n <= 0) | |
return; | |
if (da == ZERO) | |
return; | |
if (incx != 1 || incy != 1) | |
{ | |
/* code for unequal increments or equal increments != 1 */ | |
ix = 1; | |
iy = 1; | |
if(incx < 0) ix = (-n+1)*incx + 1; | |
if(incy < 0)iy = (-n+1)*incy + 1; | |
for (i = 0;i < n; i++) | |
{ | |
dy[iy] = dy[iy] + da*dx[ix]; | |
ix = ix + incx; | |
iy = iy + incy; | |
} | |
return; | |
} | |
/* code for both increments equal to 1 */ | |
m = n % 4; | |
if ( m != 0) | |
{ | |
for (i = 0; i < m; i++) | |
dy[i] = dy[i] + da*dx[i]; | |
if (n < 4) | |
return; | |
} | |
for (i = m; i < n; i = i + 4) | |
{ | |
dy[i] = dy[i] + da*dx[i]; | |
dy[i+1] = dy[i+1] + da*dx[i+1]; | |
dy[i+2] = dy[i+2] + da*dx[i+2]; | |
dy[i+3] = dy[i+3] + da*dx[i+3]; | |
} | |
} | |
/* | |
** Forms the dot product of two vectors. | |
** Jack Dongarra, linpack, 3/11/78. | |
** UNROLLED version | |
*/ | |
static REAL ddot_ur(int n,REAL *dx,int incx,REAL *dy,int incy) | |
{ | |
REAL dtemp; | |
int i,ix,iy,m; | |
dtemp = ZERO; | |
if (n <= 0) | |
return(ZERO); | |
if (incx != 1 || incy != 1) | |
{ | |
/* code for unequal increments or equal increments != 1 */ | |
ix = 0; | |
iy = 0; | |
if (incx < 0) ix = (-n+1)*incx; | |
if (incy < 0) iy = (-n+1)*incy; | |
for (i = 0;i < n; i++) | |
{ | |
dtemp = dtemp + dx[ix]*dy[iy]; | |
ix = ix + incx; | |
iy = iy + incy; | |
} | |
return(dtemp); | |
} | |
/* code for both increments equal to 1 */ | |
m = n % 5; | |
if (m != 0) | |
{ | |
for (i = 0; i < m; i++) | |
dtemp = dtemp + dx[i]*dy[i]; | |
if (n < 5) | |
return(dtemp); | |
} | |
for (i = m; i < n; i = i + 5) | |
{ | |
dtemp = dtemp + dx[i]*dy[i] + | |
dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] + | |
dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4]; | |
} | |
return(dtemp); | |
} | |
/* | |
** Scales a vector by a constant. | |
** Jack Dongarra, linpack, 3/11/78. | |
** UNROLLED version | |
*/ | |
static void dscal_ur(int n,REAL da,REAL *dx,int incx) | |
{ | |
int i,m,nincx; | |
if (n <= 0) | |
return; | |
if (incx != 1) | |
{ | |
/* code for increment not equal to 1 */ | |
nincx = n*incx; | |
for (i = 0; i < nincx; i = i + incx) | |
dx[i] = da*dx[i]; | |
return; | |
} | |
/* code for increment equal to 1 */ | |
m = n % 5; | |
if (m != 0) | |
{ | |
for (i = 0; i < m; i++) | |
dx[i] = da*dx[i]; | |
if (n < 5) | |
return; | |
} | |
for (i = m; i < n; i = i + 5) | |
{ | |
dx[i] = da*dx[i]; | |
dx[i+1] = da*dx[i+1]; | |
dx[i+2] = da*dx[i+2]; | |
dx[i+3] = da*dx[i+3]; | |
dx[i+4] = da*dx[i+4]; | |
} | |
} | |
/* | |
** Finds the index of element having max. absolute value. | |
** Jack Dongarra, linpack, 3/11/78. | |
*/ | |
static int idamax(int n,REAL *dx,int incx) | |
{ | |
REAL dmax; | |
int i, ix, itemp; | |
if (n < 1) | |
return(-1); | |
if (n ==1 ) | |
return(0); | |
if(incx != 1) | |
{ | |
/* code for increment not equal to 1 */ | |
ix = 1; | |
dmax = fabs((double)dx[0]); | |
ix = ix + incx; | |
for (i = 1; i < n; i++) | |
{ | |
if(fabs((double)dx[ix]) > dmax) | |
{ | |
itemp = i; | |
dmax = fabs((double)dx[ix]); | |
} | |
ix = ix + incx; | |
} | |
} | |
else | |
{ | |
/* code for increment equal to 1 */ | |
itemp = 0; | |
dmax = fabs((double)dx[0]); | |
for (i = 1; i < n; i++) | |
if(fabs((double)dx[i]) > dmax) | |
{ | |
itemp = i; | |
dmax = fabs((double)dx[i]); | |
} | |
} | |
return (itemp); | |
} | |
static REAL second(void) | |
{ | |
return ((REAL)((REAL)clock()/(REAL)CLOCKS_PER_SEC)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment