remyd1/Dockerfile

## docker_centos_singularity_linpack_simple.def
BootStrap: docker
From: ubuntu:16.04
IncludeCmd: yes

%post
    sed -i 's/main/main restricted universe/g' /etc/apt/sources.list
    apt-get update
    apt-get install -y bash wget build-essential gcc time libc6-dev libgcc-5-dev
    mkdir /usr/local/test
    cd /usr/local/test
    wget --no-check-certificate https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/linpack_simple_timeout.c
    wget --no-check-certificate https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/get_flops.sh
    gcc -O3 -march=native -o linpack_simple -lm linpack_simple_timeout.c

%runscript
    /usr/bin/time -o /usr/local/test/time_simple_linpack.o /usr/local/test/linpack_simple > /usr/local/test/results_simple_linpack.o

## Dockerfile
FROM centos:centos7
RUN yum update; yum clean all;
#RUN yum -y install epel-release; yum clean all
RUN yum -y install wget time gcc.x86_64 glibc-devel; yum clean all;
WORKDIR /usr/local/test
RUN wget https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/linpack_simple_timeout.c
RUN wget https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/get_flops.sh
RUN gcc -O3 -march=native -o linpack_simple -lm linpack_simple_timeout.c

# run the benchs
#RUN /usr/bin/time -o time_simple_linpack.o ./linpack_simple > results_simple_linpack.o

#CMD bash get_flops.sh results_simple_linpack.o && cat time_simple_linpack.o

## get_flops.sh
#!/bin/bash

min=`awk '$1 ~ "[0-9]+" {print $6}' $1 |sort -k6 |head -1`
max=`awk '$1 ~ "[0-9]+" {print $6}' $1 |sort -k6 |tail -1`

echo "min:"$min
echo "max:"$max
echo -n "moyenne:"
awk '$1 ~ "[0-9]+" {total=total+$6;n=n+1;} END{print total/n;}' $1

## linpack_simple_timeout.c
/*
**
** LINPACK.C        Linpack benchmark, calculates FLOPS.
**                  (FLoating Point Operations Per Second)
**
** Translated to C by Bonnie Toy 5/88
**
** Modified by Will Menninger, 10/93, with these features:
**  (modified on 2/25/94  to fix a problem with daxpy  for
**   unequal increments or equal increments not equal to 1.
**     Jack Dongarra)
**
** - Defaults to double precision.
** - Averages ROLLed and UNROLLed performance.
** - User selectable array sizes.
** - Automatically does enough repetitions to take at least 10 CPU seconds.
** - Prints machine precision.
** - ANSI prototyping.
**
** To compile:  cc -O -o linpack linpack.c -lm
**
**
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <float.h>

#define DP

#ifdef SP
#define ZERO        0.0
#define ONE         1.0
#define PREC        "Single"
#define BASE10DIG   FLT_DIG

typedef float   REAL;
#endif

#ifdef DP
#define ZERO        0.0e0
#define ONE         1.0e0
#define PREC        "Double"
#define BASE10DIG   DBL_DIG

typedef double  REAL;
#endif

static REAL linpack  (long nreps,int arsize);
static void matgen   (REAL *a,int lda,int n,REAL *b,REAL *norma);
static void dgefa    (REAL *a,int lda,int n,int *ipvt,int *info,int roll);
static void dgesl    (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll);
static void daxpy_r  (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
static REAL ddot_r   (int n,REAL *dx,int incx,REAL *dy,int incy);
static void dscal_r  (int n,REAL da,REAL *dx,int incx);
static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
static REAL ddot_ur  (int n,REAL *dx,int incx,REAL *dy,int incy);
static void dscal_ur (int n,REAL da,REAL *dx,int incx);
static int  idamax   (int n,REAL *dx,int incx);
static REAL second   (void);

static void *mempool;


void main(void)

    {
    char    buf[80];
    int     arsize;
    long    arsize2d,memreq,nreps;
    size_t  malloc_arg;

    time_t start, end;
    double elapsed;  // seconds
    start = time(NULL);
    int terminate = 1;

    //while (1)
    while (terminate)
        {
        end = time(NULL);
        elapsed = difftime(end, start);
        if (elapsed >= 120.0 /* seconds */)
            {
            terminate = 0;
            }
        else
            /*printf("Enter array size (q to quit) [200]:  ");
            fgets(buf,79,stdin);
            if (buf[0]=='q' || buf[0]=='Q')
                break;
            if (buf[0]=='\0' || buf[0]=='\n')
                arsize=200;
            else
                arsize=atoi(buf);*/
            {
            arsize=200;
            arsize/=2;
            arsize*=2;
            if (arsize<10)
                {
                printf("Too small.\n");
                continue;
                }
            arsize2d = (long)arsize*(long)arsize;
            memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int);
            printf("Memory required:  %ldK.\n",(memreq+512L)>>10);
            malloc_arg=(size_t)memreq;
            if (malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL)
                {
                printf("Not enough memory available for given array size.\n\n");
                continue;
                }
            printf("\n\nLINPACK benchmark, %s precision.\n",PREC);
            printf("Machine precision:  %d digits.\n",BASE10DIG);
            printf("Array size %d X %d.\n",arsize,arsize);
            printf("Average rolled and unrolled performance:\n\n");
            printf("    Reps Time(s) DGEFA   DGESL  OVERHEAD    KFLOPS\n");
            printf("----------------------------------------------------\n");
            nreps=1;
            while (linpack(nreps,arsize)<10.)
                nreps*=2;
            free(mempool);
            printf("\n");
            }
        }
    }


static REAL linpack(long nreps,int arsize)

    {
    REAL  *a,*b;
    REAL   norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
    int   *ipvt,n,info,lda;
    long   i,arsize2d;

    lda = arsize;
    n = arsize/2;
    arsize2d = (long)arsize*(long)arsize;
    ops=((2.0*n*n*n)/3.0+2.0*n*n);
    a=(REAL *)mempool;
    b=a+arsize2d;
    ipvt=(int *)&b[arsize];
    tdgesl=0;
    tdgefa=0;
    totalt=second();
    for (i=0;i<nreps;i++)
        {
        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info,1);
        tdgefa += second()-t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0,1);
        tdgesl += second()-t1;
        }
    for (i=0;i<nreps;i++)
        {
        matgen(a,lda,n,b,&norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,&info,0);
        tdgefa += second()-t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0,0);
        tdgesl += second()-t1;
        }
    totalt=second()-totalt;
    if (totalt<0.5 || tdgefa+tdgesl<0.2)
        return(0.);
    kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
    toverhead=totalt-tdgefa-tdgesl;
    if (tdgefa<0.)
        tdgefa=0.;
    if (tdgesl<0.)
        tdgesl=0.;
    if (toverhead<0.)
        toverhead=0.;
    printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%%  %9.3f\n",
            nreps,totalt,100.*tdgefa/totalt,
            100.*tdgesl/totalt,100.*toverhead/totalt,
            kflops);
    return(totalt);
    }


/*
** For matgen,
** We would like to declare a[][lda], but c does not allow it.  In this
** function, references to a[i][j] are written a[lda*i+j].
*/
static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma)

    {
    int init,i,j;

    init = 1325;
    *norma = 0.0;
    for (j = 0; j < n; j++)
        for (i = 0; i < n; i++)
            {
            init = (int)((long)3125*(long)init % 65536L);
            a[lda*j+i] = (init - 32768.0)/16384.0;
            *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
            }
    for (i = 0; i < n; i++)
        b[i] = 0.0;
    for (j = 0; j < n; j++)
        for (i = 0; i < n; i++)
            b[i] = b[i] + a[lda*j+i];
    }


/*
**
** DGEFA benchmark
**
** We would like to declare a[][lda], but c does not allow it.  In this
** function, references to a[i][j] are written a[lda*i+j].
**
**   dgefa factors a double precision matrix by gaussian elimination.
**
**   dgefa is usually called by dgeco, but it can be called
**   directly with a saving in time if  rcond  is not needed.
**   (time for dgeco) = (1 + 9/n)*(time for dgefa) .
**
**   on entry
**
**      a       REAL precision[n][lda]
**              the matrix to be factored.
**
**      lda     integer
**              the leading dimension of the array  a .
**
**      n       integer
**              the order of the matrix  a .
**
**   on return
**
**      a       an upper triangular matrix and the multipliers
**              which were used to obtain it.
**              the factorization can be written  a = l*u  where
**              l  is a product of permutation and unit lower
**              triangular matrices and  u  is upper triangular.
**
**      ipvt    integer[n]
**              an integer vector of pivot indices.
**
**      info    integer
**              = 0  normal value.
**              = k  if  u[k][k] .eq. 0.0 .  this is not an error
**                   condition for this subroutine, but it does
**                   indicate that dgesl or dgedi will divide by zero
**                   if called.  use  rcond  in dgeco for a reliable
**                   indication of singularity.
**
**   linpack. this version dated 08/14/78 .
**   cleve moler, university of New Mexico, argonne national lab.
**
**   functions
**
**   blas daxpy,dscal,idamax
**
*/
static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll)

    {
    REAL t;
    int idamax(),j,k,kp1,l,nm1;

    /* gaussian elimination with partial pivoting */

    if (roll)
        {
        *info = 0;
        nm1 = n - 1;
        if (nm1 >=  0)
            for (k = 0; k < nm1; k++)
                {
                kp1 = k + 1;

                /* find l = pivot index */

                l = idamax(n-k,&a[lda*k+k],1) + k;
                ipvt[k] = l;

                /* zero pivot implies this column already
                   triangularized */

                if (a[lda*k+l] != ZERO)
                    {

                    /* interchange if necessary */

                    if (l != k)
                        {
                        t = a[lda*k+l];
                        a[lda*k+l] = a[lda*k+k];
                        a[lda*k+k] = t;
                        }

                    /* compute multipliers */

                    t = -ONE/a[lda*k+k];
                    dscal_r(n-(k+1),t,&a[lda*k+k+1],1);

                    /* row elimination with column indexing */

                    for (j = kp1; j < n; j++)
                        {
                        t = a[lda*j+l];
                        if (l != k)
                            {
                            a[lda*j+l] = a[lda*j+k];
                            a[lda*j+k] = t;
                            }
                        daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
                        }
                    }
                else
                    (*info) = k;
                }
        ipvt[n-1] = n-1;
        if (a[lda*(n-1)+(n-1)] == ZERO)
            (*info) = n-1;
        }
    else
        {
        *info = 0;
        nm1 = n - 1;
        if (nm1 >=  0)
            for (k = 0; k < nm1; k++)
                {
                kp1 = k + 1;

                /* find l = pivot index */

                l = idamax(n-k,&a[lda*k+k],1) + k;
                ipvt[k] = l;

                /* zero pivot implies this column already
                   triangularized */

                if (a[lda*k+l] != ZERO)
                    {

                    /* interchange if necessary */

                    if (l != k)
                        {
                        t = a[lda*k+l];
                        a[lda*k+l] = a[lda*k+k];
                        a[lda*k+k] = t;
                        }

                    /* compute multipliers */

                    t = -ONE/a[lda*k+k];
                    dscal_ur(n-(k+1),t,&a[lda*k+k+1],1);

                    /* row elimination with column indexing */

                    for (j = kp1; j < n; j++)
                        {
                        t = a[lda*j+l];
                        if (l != k)
                            {
                            a[lda*j+l] = a[lda*j+k];
                            a[lda*j+k] = t;
                            }
                        daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
                        }
                    }
                else
                    (*info) = k;
                }
        ipvt[n-1] = n-1;
        if (a[lda*(n-1)+(n-1)] == ZERO)
            (*info) = n-1;
        }
    }


/*
**
** DGESL benchmark
**
** We would like to declare a[][lda], but c does not allow it.  In this
** function, references to a[i][j] are written a[lda*i+j].
**
**   dgesl solves the double precision system
**   a * x = b  or  trans(a) * x = b
**   using the factors computed by dgeco or dgefa.
**
**   on entry
**
**      a       double precision[n][lda]
**              the output from dgeco or dgefa.
**
**      lda     integer
**              the leading dimension of the array  a .
**
**      n       integer
**              the order of the matrix  a .
**
**      ipvt    integer[n]
**              the pivot vector from dgeco or dgefa.
**
**      b       double precision[n]
**              the right hand side vector.
**
**      job     integer
**              = 0         to solve  a*x = b ,
**              = nonzero   to solve  trans(a)*x = b  where
**                          trans(a)  is the transpose.
**
**  on return
**
**      b       the solution vector  x .
**
**   error condition
**
**      a division by zero will occur if the input factor contains a
**      zero on the diagonal.  technically this indicates singularity
**      but it is often caused by improper arguments or improper
**      setting of lda .  it will not occur if the subroutines are
**      called correctly and if dgeco has set rcond .gt. 0.0
**      or dgefa has set info .eq. 0 .
**
**   to compute  inverse(a) * c  where  c  is a matrix
**   with  p  columns
**         dgeco(a,lda,n,ipvt,rcond,z)
**         if (!rcond is too small){
**              for (j=0,j<p,j++)
**                      dgesl(a,lda,n,ipvt,c[j][0],0);
**         }
**
**   linpack. this version dated 08/14/78 .
**   cleve moler, university of new mexico, argonne national lab.
**
**   functions
**
**   blas daxpy,ddot
*/
static void dgesl(REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll)

    {
    REAL    t;
    int     k,kb,l,nm1;

    if (roll)
        {
        nm1 = n - 1;
        if (job == 0)
            {

            /* job = 0 , solve  a * x = b   */
            /* first solve  l*y = b         */

            if (nm1 >= 1)
                for (k = 0; k < nm1; k++)
                    {
                    l = ipvt[k];
                    t = b[l];
                    if (l != k)
                        {
                        b[l] = b[k];
                        b[k] = t;
                        }
                    daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
                    }

            /* now solve  u*x = y */

            for (kb = 0; kb < n; kb++)
                {
                k = n - (kb + 1);
                b[k] = b[k]/a[lda*k+k];
                t = -b[k];
                daxpy_r(k,t,&a[lda*k+0],1,&b[0],1);
                }
            }
        else
            {

            /* job = nonzero, solve  trans(a) * x = b  */
            /* first solve  trans(u)*y = b             */

            for (k = 0; k < n; k++)
                {
                t = ddot_r(k,&a[lda*k+0],1,&b[0],1);
                b[k] = (b[k] - t)/a[lda*k+k];
                }

            /* now solve trans(l)*x = y     */

            if (nm1 >= 1)
                for (kb = 1; kb < nm1; kb++)
                    {
                    k = n - (kb+1);
                    b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
                    l = ipvt[k];
                    if (l != k)
                        {
                        t = b[l];
                        b[l] = b[k];
                        b[k] = t;
                        }
                    }
            }
        }
    else
        {
        nm1 = n - 1;
        if (job == 0)
            {

            /* job = 0 , solve  a * x = b   */
            /* first solve  l*y = b         */

            if (nm1 >= 1)
                for (k = 0; k < nm1; k++)
                    {
                    l = ipvt[k];
                    t = b[l];
                    if (l != k)
                        {
                        b[l] = b[k];
                        b[k] = t;
                        }
                    daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
                    }

            /* now solve  u*x = y */

            for (kb = 0; kb < n; kb++)
                {
                k = n - (kb + 1);
                b[k] = b[k]/a[lda*k+k];
                t = -b[k];
                daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1);
                }
            }
        else
            {

            /* job = nonzero, solve  trans(a) * x = b  */
            /* first solve  trans(u)*y = b             */

            for (k = 0; k < n; k++)
                {
                t = ddot_ur(k,&a[lda*k+0],1,&b[0],1);
                b[k] = (b[k] - t)/a[lda*k+k];
                }

            /* now solve trans(l)*x = y     */

            if (nm1 >= 1)
                for (kb = 1; kb < nm1; kb++)
                    {
                    k = n - (kb+1);
                    b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
                    l = ipvt[k];
                    if (l != k)
                        {
                        t = b[l];
                        b[l] = b[k];
                        b[k] = t;
                        }
                    }
            }
        }
    }


/*
** Constant times a vector plus a vector.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static void daxpy_r(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)

    {
    int i,ix,iy;

    if (n <= 0)
        return;
    if (da == ZERO)
        return;

    if (incx != 1 || incy != 1)
        {

        /* code for unequal increments or equal increments != 1 */

        ix = 1;
        iy = 1;
        if(incx < 0) ix = (-n+1)*incx + 1;
        if(incy < 0)iy = (-n+1)*incy + 1;
        for (i = 0;i < n; i++)
            {
            dy[iy] = dy[iy] + da*dx[ix];
            ix = ix + incx;
            iy = iy + incy;
            }
        return;
        }

    /* code for both increments equal to 1 */

    for (i = 0;i < n; i++)
        dy[i] = dy[i] + da*dx[i];
    }


/*
** Forms the dot product of two vectors.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static REAL ddot_r(int n,REAL *dx,int incx,REAL *dy,int incy)

    {
    REAL dtemp;
    int i,ix,iy;

    dtemp = ZERO;

    if (n <= 0)
        return(ZERO);

    if (incx != 1 || incy != 1)
        {

        /* code for unequal increments or equal increments != 1 */

        ix = 0;
        iy = 0;
        if (incx < 0) ix = (-n+1)*incx;
        if (incy < 0) iy = (-n+1)*incy;
        for (i = 0;i < n; i++)
            {
            dtemp = dtemp + dx[ix]*dy[iy];
            ix = ix + incx;
            iy = iy + incy;
            }
        return(dtemp);
        }

    /* code for both increments equal to 1 */

    for (i=0;i < n; i++)
        dtemp = dtemp + dx[i]*dy[i];
    return(dtemp);
    }


/*
** Scales a vector by a constant.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
static void dscal_r(int n,REAL da,REAL *dx,int incx)

    {
    int i,nincx;

    if (n <= 0)
        return;
    if (incx != 1)
        {

        /* code for increment not equal to 1 */

        nincx = n*incx;
        for (i = 0; i < nincx; i = i + incx)
            dx[i] = da*dx[i];
        return;
        }

    /* code for increment equal to 1 */

    for (i = 0; i < n; i++)
        dx[i] = da*dx[i];
    }


/*
** constant times a vector plus a vector.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static void daxpy_ur(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)

    {
    int i,ix,iy,m;

    if (n <= 0)
        return;
    if (da == ZERO)
        return;

    if (incx != 1 || incy != 1)
        {

        /* code for unequal increments or equal increments != 1 */

        ix = 1;
        iy = 1;
        if(incx < 0) ix = (-n+1)*incx + 1;
        if(incy < 0)iy = (-n+1)*incy + 1;
        for (i = 0;i < n; i++)
            {
            dy[iy] = dy[iy] + da*dx[ix];
            ix = ix + incx;
            iy = iy + incy;
            }
        return;
        }

    /* code for both increments equal to 1 */

    m = n % 4;
    if ( m != 0)
        {
        for (i = 0; i < m; i++)
            dy[i] = dy[i] + da*dx[i];
        if (n < 4)
            return;
        }
    for (i = m; i < n; i = i + 4)
        {
        dy[i] = dy[i] + da*dx[i];
        dy[i+1] = dy[i+1] + da*dx[i+1];
        dy[i+2] = dy[i+2] + da*dx[i+2];
        dy[i+3] = dy[i+3] + da*dx[i+3];
        }
    }


/*
** Forms the dot product of two vectors.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static REAL ddot_ur(int n,REAL *dx,int incx,REAL *dy,int incy)

    {
    REAL dtemp;
    int i,ix,iy,m;

    dtemp = ZERO;

    if (n <= 0)
        return(ZERO);

    if (incx != 1 || incy != 1)
        {

        /* code for unequal increments or equal increments != 1 */

        ix = 0;
        iy = 0;
        if (incx < 0) ix = (-n+1)*incx;
        if (incy < 0) iy = (-n+1)*incy;
        for (i = 0;i < n; i++)
            {
            dtemp = dtemp + dx[ix]*dy[iy];
            ix = ix + incx;
            iy = iy + incy;
            }
        return(dtemp);
        }

    /* code for both increments equal to 1 */

    m = n % 5;
    if (m != 0)
        {
        for (i = 0; i < m; i++)
            dtemp = dtemp + dx[i]*dy[i];
        if (n < 5)
            return(dtemp);
        }
    for (i = m; i < n; i = i + 5)
        {
        dtemp = dtemp + dx[i]*dy[i] +
        dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
        dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
        }
    return(dtemp);
    }


/*
** Scales a vector by a constant.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
static void dscal_ur(int n,REAL da,REAL *dx,int incx)

    {
    int i,m,nincx;

    if (n <= 0)
        return;
    if (incx != 1)
        {

        /* code for increment not equal to 1 */

        nincx = n*incx;
        for (i = 0; i < nincx; i = i + incx)
            dx[i] = da*dx[i];
        return;
        }

    /* code for increment equal to 1 */

    m = n % 5;
    if (m != 0)
        {
        for (i = 0; i < m; i++)
            dx[i] = da*dx[i];
        if (n < 5)
            return;
        }
    for (i = m; i < n; i = i + 5)
        {
        dx[i] = da*dx[i];
        dx[i+1] = da*dx[i+1];
        dx[i+2] = da*dx[i+2];
        dx[i+3] = da*dx[i+3];
        dx[i+4] = da*dx[i+4];
        }
    }


/*
** Finds the index of element having max. absolute value.
** Jack Dongarra, linpack, 3/11/78.
*/
static int idamax(int n,REAL *dx,int incx)

    {
    REAL dmax;
    int i, ix, itemp;

    if (n < 1)
        return(-1);
    if (n ==1 )
        return(0);
    if(incx != 1)
        {

        /* code for increment not equal to 1 */

        ix = 1;
        dmax = fabs((double)dx[0]);
        ix = ix + incx;
        for (i = 1; i < n; i++)
            {
            if(fabs((double)dx[ix]) > dmax)
                {
                itemp = i;
                dmax = fabs((double)dx[ix]);
                }
            ix = ix + incx;
            }
        }
    else
        {

        /* code for increment equal to 1 */

        itemp = 0;
        dmax = fabs((double)dx[0]);
        for (i = 1; i < n; i++)
            if(fabs((double)dx[i]) > dmax)
                {
                itemp = i;
                dmax = fabs((double)dx[i]);
                }
        }
    return (itemp);
    }


static REAL second(void)

    {
    return ((REAL)((REAL)clock()/(REAL)CLOCKS_PER_SEC));
    }
	BootStrap: docker
	From: ubuntu:16.04
	IncludeCmd: yes

	%post
	sed -i 's/main/main restricted universe/g' /etc/apt/sources.list
	apt-get update
	apt-get install -y bash wget build-essential gcc time libc6-dev libgcc-5-dev
	mkdir /usr/local/test
	cd /usr/local/test
	wget --no-check-certificate https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/linpack_simple_timeout.c
	wget --no-check-certificate https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/get_flops.sh
	gcc -O3 -march=native -o linpack_simple -lm linpack_simple_timeout.c

	%runscript
	/usr/bin/time -o /usr/local/test/time_simple_linpack.o /usr/local/test/linpack_simple > /usr/local/test/results_simple_linpack.o
	FROM centos:centos7
	RUN yum update; yum clean all;
	#RUN yum -y install epel-release; yum clean all
	RUN yum -y install wget time gcc.x86_64 glibc-devel; yum clean all;
	WORKDIR /usr/local/test
	RUN wget https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/linpack_simple_timeout.c
	RUN wget https://gist.githubusercontent.com/remyd1/7711c3e6e5a12e674f6b6d773fe37472/raw/1b30a5bf88ec6098bc6a534ac7e4361abe4d3efe/get_flops.sh
	RUN gcc -O3 -march=native -o linpack_simple -lm linpack_simple_timeout.c

	# run the benchs
	#RUN /usr/bin/time -o time_simple_linpack.o ./linpack_simple > results_simple_linpack.o

	#CMD bash get_flops.sh results_simple_linpack.o && cat time_simple_linpack.o
	#!/bin/bash

	min=`awk '$1 ~ "[0-9]+" {print $6}' $1 \|sort -k6 \|head -1`
	max=`awk '$1 ~ "[0-9]+" {print $6}' $1 \|sort -k6 \|tail -1`

	echo "min:"$min
	echo "max:"$max
	echo -n "moyenne:"
	awk '$1 ~ "[0-9]+" {total=total+$6;n=n+1;} END{print total/n;}' $1
	/*
	**
	** LINPACK.C Linpack benchmark, calculates FLOPS.
	** (FLoating Point Operations Per Second)
	**
	** Translated to C by Bonnie Toy 5/88
	**
	** Modified by Will Menninger, 10/93, with these features:
	** (modified on 2/25/94 to fix a problem with daxpy for
	** unequal increments or equal increments not equal to 1.
	** Jack Dongarra)
	**
	** - Defaults to double precision.
	** - Averages ROLLed and UNROLLed performance.
	** - User selectable array sizes.
	** - Automatically does enough repetitions to take at least 10 CPU seconds.
	** - Prints machine precision.
	** - ANSI prototyping.
	**
	** To compile: cc -O -o linpack linpack.c -lm
	**
	**
	*/

	#include <stdio.h>
	#include <stdlib.h>
	#include <math.h>
	#include <time.h>
	#include <float.h>

	#define DP

	#ifdef SP
	#define ZERO 0.0
	#define ONE 1.0
	#define PREC "Single"
	#define BASE10DIG FLT_DIG

	typedef float REAL;
	#endif

	#ifdef DP
	#define ZERO 0.0e0
	#define ONE 1.0e0
	#define PREC "Double"
	#define BASE10DIG DBL_DIG

	typedef double REAL;
	#endif

	static REAL linpack (long nreps,int arsize);
	static void matgen (REAL a,int lda,int n,REAL b,REAL *norma);
	static void dgefa (REAL a,int lda,int n,int ipvt,int *info,int roll);
	static void dgesl (REAL a,int lda,int n,int ipvt,REAL *b,int job,int roll);
	static void daxpy_r (int n,REAL da,REAL dx,int incx,REAL dy,int incy);
	static REAL ddot_r (int n,REAL dx,int incx,REAL dy,int incy);
	static void dscal_r (int n,REAL da,REAL *dx,int incx);
	static void daxpy_ur (int n,REAL da,REAL dx,int incx,REAL dy,int incy);
	static REAL ddot_ur (int n,REAL dx,int incx,REAL dy,int incy);
	static void dscal_ur (int n,REAL da,REAL *dx,int incx);
	static int idamax (int n,REAL *dx,int incx);
	static REAL second (void);

	static void *mempool;


	void main(void)

	{
	char buf[80];
	int arsize;
	long arsize2d,memreq,nreps;
	size_t malloc_arg;

	time_t start, end;
	double elapsed; // seconds
	start = time(NULL);
	int terminate = 1;

	//while (1)
	while (terminate)
	{
	end = time(NULL);
	elapsed = difftime(end, start);
	if (elapsed >= 120.0 /* seconds */)
	{
	terminate = 0;
	}
	else
	/*printf("Enter array size (q to quit) [200]: ");
	fgets(buf,79,stdin);
	if (buf[0]=='q' \|\| buf[0]=='Q')
	break;
	if (buf[0]=='\0' \|\| buf[0]=='\n')
	arsize=200;
	else
	arsize=atoi(buf);*/
	{
	arsize=200;
	arsize/=2;
	arsize*=2;
	if (arsize<10)
	{
	printf("Too small.\n");
	continue;
	}
	arsize2d = (long)arsize*(long)arsize;
	memreq=arsize2dsizeof(REAL)+(long)arsizesizeof(REAL)+(long)arsize*sizeof(int);
	printf("Memory required: %ldK.\n",(memreq+512L)>>10);
	malloc_arg=(size_t)memreq;
	if (malloc_arg!=memreq \|\| (mempool=malloc(malloc_arg))==NULL)
	{
	printf("Not enough memory available for given array size.\n\n");
	continue;
	}
	printf("\n\nLINPACK benchmark, %s precision.\n",PREC);
	printf("Machine precision: %d digits.\n",BASE10DIG);
	printf("Array size %d X %d.\n",arsize,arsize);
	printf("Average rolled and unrolled performance:\n\n");
	printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n");
	printf("----------------------------------------------------\n");
	nreps=1;
	while (linpack(nreps,arsize)<10.)
	nreps*=2;
	free(mempool);
	printf("\n");
	}
	}
	}


	static REAL linpack(long nreps,int arsize)

	{
	REAL a,b;
	REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
	int *ipvt,n,info,lda;
	long i,arsize2d;

	lda = arsize;
	n = arsize/2;
	arsize2d = (long)arsize*(long)arsize;
	ops=((2.0nnn)/3.0+2.0n*n);
	a=(REAL *)mempool;
	b=a+arsize2d;
	ipvt=(int *)&b[arsize];
	tdgesl=0;
	tdgefa=0;
	totalt=second();
	for (i=0;i<nreps;i++)
	{
	matgen(a,lda,n,b,&norma);
	t1 = second();
	dgefa(a,lda,n,ipvt,&info,1);
	tdgefa += second()-t1;
	t1 = second();
	dgesl(a,lda,n,ipvt,b,0,1);
	tdgesl += second()-t1;
	}
	for (i=0;i<nreps;i++)
	{
	matgen(a,lda,n,b,&norma);
	t1 = second();
	dgefa(a,lda,n,ipvt,&info,0);
	tdgefa += second()-t1;
	t1 = second();
	dgesl(a,lda,n,ipvt,b,0,0);
	tdgesl += second()-t1;
	}
	totalt=second()-totalt;
	if (totalt<0.5 \|\| tdgefa+tdgesl<0.2)
	return(0.);
	kflops=2.nrepsops/(1000.*(tdgefa+tdgesl));
	toverhead=totalt-tdgefa-tdgesl;
	if (tdgefa<0.)
	tdgefa=0.;
	if (tdgesl<0.)
	tdgesl=0.;
	if (toverhead<0.)
	toverhead=0.;
	printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n",
	nreps,totalt,100.*tdgefa/totalt,
	100.tdgesl/totalt,100.toverhead/totalt,
	kflops);
	return(totalt);
	}


	/*
	** For matgen,
	** We would like to declare a[][lda], but c does not allow it. In this
	** function, references to a[i][j] are written a[lda*i+j].
	*/
	static void matgen(REAL a,int lda,int n,REAL b,REAL *norma)

	{
	int init,i,j;

	init = 1325;
	*norma = 0.0;
	for (j = 0; j < n; j++)
	for (i = 0; i < n; i++)
	{
	init = (int)((long)3125*(long)init % 65536L);
	a[lda*j+i] = (init - 32768.0)/16384.0;
	norma = (a[ldaj+i] > norma) ? a[ldaj+i] : *norma;
	}
	for (i = 0; i < n; i++)
	b[i] = 0.0;
	for (j = 0; j < n; j++)
	for (i = 0; i < n; i++)
	b[i] = b[i] + a[lda*j+i];
	}


	/*
	**
	** DGEFA benchmark
	**
	** We would like to declare a[][lda], but c does not allow it. In this
	** function, references to a[i][j] are written a[lda*i+j].
	**
	** dgefa factors a double precision matrix by gaussian elimination.
	**
	** dgefa is usually called by dgeco, but it can be called
	** directly with a saving in time if rcond is not needed.
	** (time for dgeco) = (1 + 9/n)*(time for dgefa) .
	**
	** on entry
	**
	** a REAL precision[n][lda]
	** the matrix to be factored.
	**
	** lda integer
	** the leading dimension of the array a .
	**
	** n integer
	** the order of the matrix a .
	**
	** on return
	**
	** a an upper triangular matrix and the multipliers
	** which were used to obtain it.
	** the factorization can be written a = l*u where
	** l is a product of permutation and unit lower
	** triangular matrices and u is upper triangular.
	**
	** ipvt integer[n]
	** an integer vector of pivot indices.
	**
	** info integer
	** = 0 normal value.
	** = k if u[k][k] .eq. 0.0 . this is not an error
	** condition for this subroutine, but it does
	** indicate that dgesl or dgedi will divide by zero
	** if called. use rcond in dgeco for a reliable
	** indication of singularity.
	**
	** linpack. this version dated 08/14/78 .
	** cleve moler, university of New Mexico, argonne national lab.
	**
	** functions
	**
	** blas daxpy,dscal,idamax
	**
	*/
	static void dgefa(REAL a,int lda,int n,int ipvt,int *info,int roll)

	{
	REAL t;
	int idamax(),j,k,kp1,l,nm1;

	/* gaussian elimination with partial pivoting */

	if (roll)
	{
	*info = 0;
	nm1 = n - 1;
	if (nm1 >= 0)
	for (k = 0; k < nm1; k++)
	{
	kp1 = k + 1;

	/* find l = pivot index */

	l = idamax(n-k,&a[lda*k+k],1) + k;
	ipvt[k] = l;

	/* zero pivot implies this column already
	triangularized */

	if (a[lda*k+l] != ZERO)
	{

	/* interchange if necessary */

	if (l != k)
	{
	t = a[lda*k+l];
	a[ldak+l] = a[ldak+k];
	a[lda*k+k] = t;
	}

	/* compute multipliers */

	t = -ONE/a[lda*k+k];
	dscal_r(n-(k+1),t,&a[lda*k+k+1],1);

	/* row elimination with column indexing */

	for (j = kp1; j < n; j++)
	{
	t = a[lda*j+l];
	if (l != k)
	{
	a[ldaj+l] = a[ldaj+k];
	a[lda*j+k] = t;
	}
	daxpy_r(n-(k+1),t,&a[ldak+k+1],1,&a[ldaj+k+1],1);
	}
	}
	else
	(*info) = k;
	}
	ipvt[n-1] = n-1;
	if (a[lda*(n-1)+(n-1)] == ZERO)
	(*info) = n-1;
	}
	else
	{
	*info = 0;
	nm1 = n - 1;
	if (nm1 >= 0)
	for (k = 0; k < nm1; k++)
	{
	kp1 = k + 1;

	/* find l = pivot index */

	l = idamax(n-k,&a[lda*k+k],1) + k;
	ipvt[k] = l;

	/* zero pivot implies this column already
	triangularized */

	if (a[lda*k+l] != ZERO)
	{

	/* interchange if necessary */

	if (l != k)
	{
	t = a[lda*k+l];
	a[ldak+l] = a[ldak+k];
	a[lda*k+k] = t;
	}

	/* compute multipliers */

	t = -ONE/a[lda*k+k];
	dscal_ur(n-(k+1),t,&a[lda*k+k+1],1);

	/* row elimination with column indexing */

	for (j = kp1; j < n; j++)
	{
	t = a[lda*j+l];
	if (l != k)
	{
	a[ldaj+l] = a[ldaj+k];
	a[lda*j+k] = t;
	}
	daxpy_ur(n-(k+1),t,&a[ldak+k+1],1,&a[ldaj+k+1],1);
	}
	}
	else
	(*info) = k;
	}
	ipvt[n-1] = n-1;
	if (a[lda*(n-1)+(n-1)] == ZERO)
	(*info) = n-1;
	}
	}


	/*
	**
	** DGESL benchmark
	**
	** We would like to declare a[][lda], but c does not allow it. In this
	** function, references to a[i][j] are written a[lda*i+j].
	**
	** dgesl solves the double precision system
	** a * x = b or trans(a) * x = b
	** using the factors computed by dgeco or dgefa.
	**
	** on entry
	**
	** a double precision[n][lda]
	** the output from dgeco or dgefa.
	**
	** lda integer
	** the leading dimension of the array a .
	**
	** n integer
	** the order of the matrix a .
	**
	** ipvt integer[n]
	** the pivot vector from dgeco or dgefa.
	**
	** b double precision[n]
	** the right hand side vector.
	**
	** job integer
	** = 0 to solve a*x = b ,
	** = nonzero to solve trans(a)*x = b where
	** trans(a) is the transpose.
	**
	** on return
	**
	** b the solution vector x .
	**
	** error condition
	**
	** a division by zero will occur if the input factor contains a
	** zero on the diagonal. technically this indicates singularity
	** but it is often caused by improper arguments or improper
	** setting of lda . it will not occur if the subroutines are
	** called correctly and if dgeco has set rcond .gt. 0.0
	** or dgefa has set info .eq. 0 .
	**
	** to compute inverse(a) * c where c is a matrix
	** with p columns
	** dgeco(a,lda,n,ipvt,rcond,z)
	** if (!rcond is too small){
	** for (j=0,j<p,j++)
	** dgesl(a,lda,n,ipvt,c[j][0],0);
	** }
	**
	** linpack. this version dated 08/14/78 .
	** cleve moler, university of new mexico, argonne national lab.
	**
	** functions
	**
	** blas daxpy,ddot
	*/
	static void dgesl(REAL a,int lda,int n,int ipvt,REAL *b,int job,int roll)

	{
	REAL t;
	int k,kb,l,nm1;

	if (roll)
	{
	nm1 = n - 1;
	if (job == 0)
	{

	/* job = 0 , solve a * x = b */
	/* first solve ly = b /

	if (nm1 >= 1)
	for (k = 0; k < nm1; k++)
	{
	l = ipvt[k];
	t = b[l];
	if (l != k)
	{
	b[l] = b[k];
	b[k] = t;
	}
	daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
	}

	/* now solve ux = y /

	for (kb = 0; kb < n; kb++)
	{
	k = n - (kb + 1);
	b[k] = b[k]/a[lda*k+k];
	t = -b[k];
	daxpy_r(k,t,&a[lda*k+0],1,&b[0],1);
	}
	}
	else
	{

	/* job = nonzero, solve trans(a) * x = b */
	/* first solve trans(u)y = b /

	for (k = 0; k < n; k++)
	{
	t = ddot_r(k,&a[lda*k+0],1,&b[0],1);
	b[k] = (b[k] - t)/a[lda*k+k];
	}

	/* now solve trans(l)x = y /

	if (nm1 >= 1)
	for (kb = 1; kb < nm1; kb++)
	{
	k = n - (kb+1);
	b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
	l = ipvt[k];
	if (l != k)
	{
	t = b[l];
	b[l] = b[k];
	b[k] = t;
	}
	}
	}
	}
	else
	{
	nm1 = n - 1;
	if (job == 0)
	{

	/* job = 0 , solve a * x = b */
	/* first solve ly = b /

	if (nm1 >= 1)
	for (k = 0; k < nm1; k++)
	{
	l = ipvt[k];
	t = b[l];
	if (l != k)
	{
	b[l] = b[k];
	b[k] = t;
	}
	daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
	}

	/* now solve ux = y /

	for (kb = 0; kb < n; kb++)
	{
	k = n - (kb + 1);
	b[k] = b[k]/a[lda*k+k];
	t = -b[k];
	daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1);
	}
	}
	else
	{

	/* job = nonzero, solve trans(a) * x = b */
	/* first solve trans(u)y = b /

	for (k = 0; k < n; k++)
	{
	t = ddot_ur(k,&a[lda*k+0],1,&b[0],1);
	b[k] = (b[k] - t)/a[lda*k+k];
	}

	/* now solve trans(l)x = y /

	if (nm1 >= 1)
	for (kb = 1; kb < nm1; kb++)
	{
	k = n - (kb+1);
	b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
	l = ipvt[k];
	if (l != k)
	{
	t = b[l];
	b[l] = b[k];
	b[k] = t;
	}
	}
	}
	}
	}



	/*
	** Constant times a vector plus a vector.
	** Jack Dongarra, linpack, 3/11/78.
	** ROLLED version
	*/
	static void daxpy_r(int n,REAL da,REAL dx,int incx,REAL dy,int incy)

	{
	int i,ix,iy;

	if (n <= 0)
	return;
	if (da == ZERO)
	return;

	if (incx != 1 \|\| incy != 1)
	{

	/* code for unequal increments or equal increments != 1 */

	ix = 1;
	iy = 1;
	if(incx < 0) ix = (-n+1)*incx + 1;
	if(incy < 0)iy = (-n+1)*incy + 1;
	for (i = 0;i < n; i++)
	{
	dy[iy] = dy[iy] + da*dx[ix];
	ix = ix + incx;
	iy = iy + incy;
	}
	return;
	}

	/* code for both increments equal to 1 */

	for (i = 0;i < n; i++)
	dy[i] = dy[i] + da*dx[i];
	}


	/*
	** Forms the dot product of two vectors.
	** Jack Dongarra, linpack, 3/11/78.
	** ROLLED version
	*/
	static REAL ddot_r(int n,REAL dx,int incx,REAL dy,int incy)

	{
	REAL dtemp;
	int i,ix,iy;

	dtemp = ZERO;

	if (n <= 0)
	return(ZERO);

	if (incx != 1 \|\| incy != 1)
	{

	/* code for unequal increments or equal increments != 1 */

	ix = 0;
	iy = 0;
	if (incx < 0) ix = (-n+1)*incx;
	if (incy < 0) iy = (-n+1)*incy;
	for (i = 0;i < n; i++)
	{
	dtemp = dtemp + dx[ix]*dy[iy];
	ix = ix + incx;
	iy = iy + incy;
	}
	return(dtemp);
	}

	/* code for both increments equal to 1 */

	for (i=0;i < n; i++)
	dtemp = dtemp + dx[i]*dy[i];
	return(dtemp);
	}


	/*
	** Scales a vector by a constant.
	** Jack Dongarra, linpack, 3/11/78.
	** ROLLED version
	*/
	static void dscal_r(int n,REAL da,REAL *dx,int incx)

	{
	int i,nincx;

	if (n <= 0)
	return;
	if (incx != 1)
	{

	/* code for increment not equal to 1 */

	nincx = n*incx;
	for (i = 0; i < nincx; i = i + incx)
	dx[i] = da*dx[i];
	return;
	}

	/* code for increment equal to 1 */

	for (i = 0; i < n; i++)
	dx[i] = da*dx[i];
	}


	/*
	** constant times a vector plus a vector.
	** Jack Dongarra, linpack, 3/11/78.
	** UNROLLED version
	*/
	static void daxpy_ur(int n,REAL da,REAL dx,int incx,REAL dy,int incy)

	{
	int i,ix,iy,m;

	if (n <= 0)
	return;
	if (da == ZERO)
	return;

	if (incx != 1 \|\| incy != 1)
	{

	/* code for unequal increments or equal increments != 1 */

	ix = 1;
	iy = 1;
	if(incx < 0) ix = (-n+1)*incx + 1;
	if(incy < 0)iy = (-n+1)*incy + 1;
	for (i = 0;i < n; i++)
	{
	dy[iy] = dy[iy] + da*dx[ix];
	ix = ix + incx;
	iy = iy + incy;
	}
	return;
	}

	/* code for both increments equal to 1 */

	m = n % 4;
	if ( m != 0)
	{
	for (i = 0; i < m; i++)
	dy[i] = dy[i] + da*dx[i];
	if (n < 4)
	return;
	}
	for (i = m; i < n; i = i + 4)
	{
	dy[i] = dy[i] + da*dx[i];
	dy[i+1] = dy[i+1] + da*dx[i+1];
	dy[i+2] = dy[i+2] + da*dx[i+2];
	dy[i+3] = dy[i+3] + da*dx[i+3];
	}
	}


	/*
	** Forms the dot product of two vectors.
	** Jack Dongarra, linpack, 3/11/78.
	** UNROLLED version
	*/
	static REAL ddot_ur(int n,REAL dx,int incx,REAL dy,int incy)

	{
	REAL dtemp;
	int i,ix,iy,m;

	dtemp = ZERO;

	if (n <= 0)
	return(ZERO);

	if (incx != 1 \|\| incy != 1)
	{

	/* code for unequal increments or equal increments != 1 */

	ix = 0;
	iy = 0;
	if (incx < 0) ix = (-n+1)*incx;
	if (incy < 0) iy = (-n+1)*incy;
	for (i = 0;i < n; i++)
	{
	dtemp = dtemp + dx[ix]*dy[iy];
	ix = ix + incx;
	iy = iy + incy;
	}
	return(dtemp);
	}

	/* code for both increments equal to 1 */

	m = n % 5;
	if (m != 0)
	{
	for (i = 0; i < m; i++)
	dtemp = dtemp + dx[i]*dy[i];
	if (n < 5)
	return(dtemp);
	}
	for (i = m; i < n; i = i + 5)
	{
	dtemp = dtemp + dx[i]*dy[i] +
	dx[i+1]dy[i+1] + dx[i+2]dy[i+2] +
	dx[i+3]dy[i+3] + dx[i+4]dy[i+4];
	}
	return(dtemp);
	}


	/*
	** Scales a vector by a constant.
	** Jack Dongarra, linpack, 3/11/78.
	** UNROLLED version
	*/
	static void dscal_ur(int n,REAL da,REAL *dx,int incx)

	{
	int i,m,nincx;

	if (n <= 0)
	return;
	if (incx != 1)
	{

	/* code for increment not equal to 1 */

	nincx = n*incx;
	for (i = 0; i < nincx; i = i + incx)
	dx[i] = da*dx[i];
	return;
	}

	/* code for increment equal to 1 */

	m = n % 5;
	if (m != 0)
	{
	for (i = 0; i < m; i++)
	dx[i] = da*dx[i];
	if (n < 5)
	return;
	}
	for (i = m; i < n; i = i + 5)
	{
	dx[i] = da*dx[i];
	dx[i+1] = da*dx[i+1];
	dx[i+2] = da*dx[i+2];
	dx[i+3] = da*dx[i+3];
	dx[i+4] = da*dx[i+4];
	}
	}


	/*
	** Finds the index of element having max. absolute value.
	** Jack Dongarra, linpack, 3/11/78.
	*/
	static int idamax(int n,REAL *dx,int incx)

	{
	REAL dmax;
	int i, ix, itemp;

	if (n < 1)
	return(-1);
	if (n ==1 )
	return(0);
	if(incx != 1)
	{

	/* code for increment not equal to 1 */

	ix = 1;
	dmax = fabs((double)dx[0]);
	ix = ix + incx;
	for (i = 1; i < n; i++)
	{
	if(fabs((double)dx[ix]) > dmax)
	{
	itemp = i;
	dmax = fabs((double)dx[ix]);
	}
	ix = ix + incx;
	}
	}
	else
	{

	/* code for increment equal to 1 */

	itemp = 0;
	dmax = fabs((double)dx[0]);
	for (i = 1; i < n; i++)
	if(fabs((double)dx[i]) > dmax)
	{
	itemp = i;
	dmax = fabs((double)dx[i]);
	}
	}
	return (itemp);
	}


	static REAL second(void)

	{
	return ((REAL)((REAL)clock()/(REAL)CLOCKS_PER_SEC));
	}