|
#include <stdio.h> |
|
#include <string.h> |
|
#include <stdlib.h> |
|
#include <math.h> |
|
#include <assert.h> |
|
#include <float.h> |
|
#include <stdbool.h> |
|
#include "mkl.h" |
|
#include "mpi.h" |
|
#include "mkl_cluster_sparse_solver.h" |
|
#include "random_matrix.h" |
|
|
|
#define alloc(TYPE, SIZE) (TYPE *)mkl_malloc(sizeof(TYPE) * (SIZE), 64) |
|
|
|
//This is adapted from cl_solver_unsym_dist from the examples with mkl |
|
int solve(csr_sparse_matrix * Q, double * answer, double * right_hand_side, MKL_INT size, int rank, int * comm, MKL_INT start, MKL_INT end) //{{{ |
|
{ |
|
MKL_INT n = size; |
|
MKL_INT mtype = 11; /* Real unsymmetric matrix */ |
|
/* RHS and solution vectors. */ |
|
double *b = NULL; |
|
double *x = NULL; |
|
|
|
int mpi_stat = 0; |
|
MKL_INT nrhs = 1; /* Number of right hand sides. */ |
|
/* Internal solver memory pointer pt, */ |
|
/* 32-bit: int pt[64]; 64-bit: long int pt[64] */ |
|
/* or void *pt[64] should be OK on both architectures */ |
|
void *pt[64] = { 0 }; |
|
/* Cluster Sparse Solver control parameters. */ |
|
MKL_INT iparm[64] = { 0 }; |
|
MKL_INT maxfct, mnum, phase, msglvl, error; |
|
|
|
/* Auxiliary variables. */ |
|
double ddum; /* Double dummy */ |
|
MKL_INT idum; /* Integer dummy. */ |
|
MKL_INT j; |
|
/* -------------------------------------------------------------------- */ |
|
/* .. Setup Cluster Sparse Solver control parameters. */ |
|
/* -------------------------------------------------------------------- */ |
|
iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ |
|
iparm[ 1] = 3; /* Use METIS for fill-in reordering */ //openmp (3) |
|
iparm[ 5] = 0; /* Write solution into x */ |
|
iparm[ 7] = 2; /* Max number of iterative refinement steps */ |
|
iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ |
|
iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ |
|
iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ |
|
iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ |
|
iparm[18] = -1; /* Output: Mflops for LU factorization */ |
|
iparm[26] = 0; /* Check input data for correctness */ |
|
iparm[34] = 1; /* Zero-based indexing */ |
|
iparm[39] = 2; /* Input: matrix/rhs/solution are distributed between MPI processes */ |
|
iparm[40] = start; // Domain start (first row) |
|
iparm[41] = end; // Domain end (last row) |
|
|
|
maxfct = 1; /* Maximum number of numerical factorizations. */ |
|
mnum = 1; /* Which factorization to use. */ |
|
msglvl = 1; /* Print statistical information in file */ |
|
error = 0; /* Initialize error flag */ |
|
x = answer; |
|
b = right_hand_side; |
|
/* -------------------------------------------------------------------- */ |
|
/* .. Reordering and Symbolic Factorization. This step also allocates */ |
|
/* all memory that is necessary for the factorization. */ |
|
/* -------------------------------------------------------------------- */ |
|
phase = 11; |
|
if ( rank == 0 ) printf ("Starting symbolic factorization\n"); |
|
MPI_Barrier(MPI_COMM_WORLD); |
|
cluster_sparse_solver ( pt, &maxfct, &mnum, &mtype, &phase, |
|
&n, Q->data, Q->row_index, Q->cols, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, comm, &error ); |
|
MPI_Barrier(MPI_COMM_WORLD); |
|
if ( error != 0 ) |
|
{ |
|
if ( rank == 0 ) printf ("\nERROR during symbolic factorization: %lld", (long long int)error); |
|
return phase; |
|
} |
|
|
|
/* -------------------------------------------------------------------- */ |
|
/* .. Numerical factorization. */ |
|
/* -------------------------------------------------------------------- */ |
|
phase = 22; |
|
if ( rank == 0 ) printf ("Starting numeric factorization\n"); |
|
cluster_sparse_solver ( pt, &maxfct, &mnum, &mtype, &phase, |
|
&n, Q->data, Q->row_index, Q->cols, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, comm, &error ); |
|
MPI_Barrier(MPI_COMM_WORLD); |
|
if ( error != 0 ) |
|
{ |
|
if ( rank == 0 ) printf ("\nERROR during numerical factorization: %lli", (long long int)error); |
|
return 2; |
|
} |
|
|
|
/* -------------------------------------------------------------------- */ |
|
/* .. Back substitution and iterative refinement. */ |
|
/* -------------------------------------------------------------------- */ |
|
phase = 33; |
|
|
|
if ( rank == 0 ) printf ("\nSolving system..."); |
|
cluster_sparse_solver ( pt, &maxfct, &mnum, &mtype, &phase, |
|
&n, Q->data, Q->row_index, Q->cols, &idum, &nrhs, iparm, &msglvl, b, x, comm, &error ); |
|
MPI_Barrier(MPI_COMM_WORLD); |
|
if ( error != 0 ) |
|
{ |
|
if ( rank == 0 ) printf ("\nERROR during solution: %lli", (long long int)error); |
|
return 4; |
|
} |
|
|
|
/* -------------------------------------------------------------------- */ |
|
/* .. Termination and release of memory. */ |
|
/* -------------------------------------------------------------------- */ |
|
phase = -1; /* Release internal memory. */ |
|
cluster_sparse_solver ( pt, &maxfct, &mnum, &mtype, &phase, |
|
&n, &ddum, Q->row_index, Q->cols, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, comm, &error ); |
|
MPI_Barrier(MPI_COMM_WORLD); |
|
if ( error != 0 ) |
|
{ |
|
if ( rank == 0 ) printf ("\nERROR during release memory: %lli", (long long int)error); |
|
return 5; |
|
} |
|
return 0; |
|
} //}}} |
|
|
|
int main(int argc, char ** argv) |
|
{ |
|
//OMP settings |
|
int num_threads = 16; |
|
omp_set_num_threads(num_threads); |
|
|
|
//Arguments |
|
assert(argc == 2); |
|
MKL_INT matrix_size = atoi(argv[1]); //matrix size |
|
|
|
// RHS and solution vectors |
|
double * y = NULL; |
|
double * x = NULL; |
|
|
|
// Sparse matrix struct |
|
csr_sparse_matrix * Q = NULL; |
|
|
|
//MPI setup |
|
int mpi_stat = 0; |
|
int dummy_argc = 0; |
|
int comm, rank, mpi_size; |
|
char** dummy_argv; |
|
MKL_INT i; |
|
MKL_INT error = 0; |
|
|
|
// Init MPI |
|
mpi_stat = MPI_Init( &dummy_argc, &dummy_argv ); |
|
mpi_stat = MPI_Comm_rank( MPI_COMM_WORLD, &rank ); |
|
mpi_stat = MPI_Comm_size( MPI_COMM_WORLD, &mpi_size ); |
|
comm = MPI_Comm_c2f( MPI_COMM_WORLD ); |
|
|
|
//ranges of distributed matrix chunks |
|
MKL_INT chunk = matrix_size / mpi_size; |
|
MKL_INT rem = matrix_size - (chunk * matrix_size); |
|
MKL_INT start = rank * chunk; |
|
MKL_INT end = (start + chunk) - 1; |
|
if (rank == mpi_size - 1) { |
|
end = matrix_size - 1; |
|
} |
|
MKL_INT chunk_size = end - start + 1; |
|
printf("From: %lld To: %lld, %lld @ %d\n", start, end, end - start + 1, rank); |
|
|
|
//Matrix with a few random numbers |
|
Q = random_matrix(matrix_size, start, end, rank); |
|
|
|
//Lock |
|
MPI_Barrier(MPI_COMM_WORLD); |
|
|
|
//RHS and solution alloc |
|
x = alloc(double, chunk_size); |
|
y = alloc(double, chunk_size); |
|
|
|
// fill with some random numbers |
|
for (i = 0; i < chunk_size; i ++) { |
|
y[i] = rand(); |
|
} |
|
|
|
MPI_Barrier(MPI_COMM_WORLD); |
|
|
|
//call sequence to cluster_sparse_solver |
|
error = solve(Q, x, y, matrix_size, rank, &comm, start, end); |
|
|
|
MPI_Barrier(MPI_COMM_WORLD); |
|
|
|
if (error && rank == 0) { |
|
printf ("[ERROR]: %lld\n", (long long int)error); |
|
mpi_stat = MPI_Finalize(); |
|
exit(1); |
|
} |
|
|
|
//free everything |
|
mkl_free(Q->data); |
|
mkl_free(Q->cols); |
|
mkl_free(Q->row_index); |
|
mkl_free(Q); |
|
mkl_free(x); |
|
mkl_free(y); |
|
|
|
mpi_stat = MPI_Finalize(); |
|
return 0; |
|
} |