Skip to content

Instantly share code, notes, and snippets.

/README.md Secret

Created October 9, 2015 22:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/173e2dff21fef71bea51 to your computer and use it in GitHub Desktop.
Save anonymous/173e2dff21fef71bea51 to your computer and use it in GitHub Desktop.
Example of internal error with cluster_sparse_solver

#Example of unexpected behavious of cluster_sparse_solver with MKL 11.3

##run Make and run with any number of mpi nodes. First argument to the executable is the matrix size

make
mpirun -n 2 cl_solver_unsym_distr 2000

##result

*** Error in PARDISO  (     insufficient_memory) error_num= 1                                                                           
*** Error in PARDISO memory allocation: MATCHING_REORDERING_DATA, allocation of 1 bytes failed                                          
total memory wanted here: 126378 kbyte

Running with a smaller size of the input matrix of 1000 produces a segfault on an internal structure.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <assert.h>
#include <float.h>
#include <stdbool.h>
#include "mkl.h"
#include "mpi.h"
#include "mkl_cluster_sparse_solver.h"
#include "random_matrix.h"
#define alloc(TYPE, SIZE) (TYPE *)mkl_malloc(sizeof(TYPE) * (SIZE), 64)
//This is adapted from cl_solver_unsym_dist from the examples with mkl
int solve(csr_sparse_matrix * Q, double * answer, double * right_hand_side, MKL_INT size, int rank, int * comm, MKL_INT start, MKL_INT end) //{{{
{
MKL_INT n = size;
MKL_INT mtype = 11; /* Real unsymmetric matrix */
/* RHS and solution vectors. */
double *b = NULL;
double *x = NULL;
int mpi_stat = 0;
MKL_INT nrhs = 1; /* Number of right hand sides. */
/* Internal solver memory pointer pt, */
/* 32-bit: int pt[64]; 64-bit: long int pt[64] */
/* or void *pt[64] should be OK on both architectures */
void *pt[64] = { 0 };
/* Cluster Sparse Solver control parameters. */
MKL_INT iparm[64] = { 0 };
MKL_INT maxfct, mnum, phase, msglvl, error;
/* Auxiliary variables. */
double ddum; /* Double dummy */
MKL_INT idum; /* Integer dummy. */
MKL_INT j;
/* -------------------------------------------------------------------- */
/* .. Setup Cluster Sparse Solver control parameters. */
/* -------------------------------------------------------------------- */
iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */
iparm[ 1] = 3; /* Use METIS for fill-in reordering */ //openmp (3)
iparm[ 5] = 0; /* Write solution into x */
iparm[ 7] = 2; /* Max number of iterative refinement steps */
iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */
iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
iparm[18] = -1; /* Output: Mflops for LU factorization */
iparm[26] = 0; /* Check input data for correctness */
iparm[34] = 1; /* Zero-based indexing */
iparm[39] = 2; /* Input: matrix/rhs/solution are distributed between MPI processes */
iparm[40] = start; // Domain start (first row)
iparm[41] = end; // Domain end (last row)
maxfct = 1; /* Maximum number of numerical factorizations. */
mnum = 1; /* Which factorization to use. */
msglvl = 1; /* Print statistical information in file */
error = 0; /* Initialize error flag */
x = answer;
b = right_hand_side;
/* -------------------------------------------------------------------- */
/* .. Reordering and Symbolic Factorization. This step also allocates */
/* all memory that is necessary for the factorization. */
/* -------------------------------------------------------------------- */
phase = 11;
if ( rank == 0 ) printf ("Starting symbolic factorization\n");
MPI_Barrier(MPI_COMM_WORLD);
cluster_sparse_solver ( pt, &maxfct, &mnum, &mtype, &phase,
&n, Q->data, Q->row_index, Q->cols, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, comm, &error );
MPI_Barrier(MPI_COMM_WORLD);
if ( error != 0 )
{
if ( rank == 0 ) printf ("\nERROR during symbolic factorization: %lld", (long long int)error);
return phase;
}
/* -------------------------------------------------------------------- */
/* .. Numerical factorization. */
/* -------------------------------------------------------------------- */
phase = 22;
if ( rank == 0 ) printf ("Starting numeric factorization\n");
cluster_sparse_solver ( pt, &maxfct, &mnum, &mtype, &phase,
&n, Q->data, Q->row_index, Q->cols, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, comm, &error );
MPI_Barrier(MPI_COMM_WORLD);
if ( error != 0 )
{
if ( rank == 0 ) printf ("\nERROR during numerical factorization: %lli", (long long int)error);
return 2;
}
/* -------------------------------------------------------------------- */
/* .. Back substitution and iterative refinement. */
/* -------------------------------------------------------------------- */
phase = 33;
if ( rank == 0 ) printf ("\nSolving system...");
cluster_sparse_solver ( pt, &maxfct, &mnum, &mtype, &phase,
&n, Q->data, Q->row_index, Q->cols, &idum, &nrhs, iparm, &msglvl, b, x, comm, &error );
MPI_Barrier(MPI_COMM_WORLD);
if ( error != 0 )
{
if ( rank == 0 ) printf ("\nERROR during solution: %lli", (long long int)error);
return 4;
}
/* -------------------------------------------------------------------- */
/* .. Termination and release of memory. */
/* -------------------------------------------------------------------- */
phase = -1; /* Release internal memory. */
cluster_sparse_solver ( pt, &maxfct, &mnum, &mtype, &phase,
&n, &ddum, Q->row_index, Q->cols, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, comm, &error );
MPI_Barrier(MPI_COMM_WORLD);
if ( error != 0 )
{
if ( rank == 0 ) printf ("\nERROR during release memory: %lli", (long long int)error);
return 5;
}
return 0;
} //}}}
int main(int argc, char ** argv)
{
//OMP settings
int num_threads = 16;
omp_set_num_threads(num_threads);
//Arguments
assert(argc == 2);
MKL_INT matrix_size = atoi(argv[1]); //matrix size
// RHS and solution vectors
double * y = NULL;
double * x = NULL;
// Sparse matrix struct
csr_sparse_matrix * Q = NULL;
//MPI setup
int mpi_stat = 0;
int dummy_argc = 0;
int comm, rank, mpi_size;
char** dummy_argv;
MKL_INT i;
MKL_INT error = 0;
// Init MPI
mpi_stat = MPI_Init( &dummy_argc, &dummy_argv );
mpi_stat = MPI_Comm_rank( MPI_COMM_WORLD, &rank );
mpi_stat = MPI_Comm_size( MPI_COMM_WORLD, &mpi_size );
comm = MPI_Comm_c2f( MPI_COMM_WORLD );
//ranges of distributed matrix chunks
MKL_INT chunk = matrix_size / mpi_size;
MKL_INT rem = matrix_size - (chunk * matrix_size);
MKL_INT start = rank * chunk;
MKL_INT end = (start + chunk) - 1;
if (rank == mpi_size - 1) {
end = matrix_size - 1;
}
MKL_INT chunk_size = end - start + 1;
printf("From: %lld To: %lld, %lld @ %d\n", start, end, end - start + 1, rank);
//Matrix with a few random numbers
Q = random_matrix(matrix_size, start, end, rank);
//Lock
MPI_Barrier(MPI_COMM_WORLD);
//RHS and solution alloc
x = alloc(double, chunk_size);
y = alloc(double, chunk_size);
// fill with some random numbers
for (i = 0; i < chunk_size; i ++) {
y[i] = rand();
}
MPI_Barrier(MPI_COMM_WORLD);
//call sequence to cluster_sparse_solver
error = solve(Q, x, y, matrix_size, rank, &comm, start, end);
MPI_Barrier(MPI_COMM_WORLD);
if (error && rank == 0) {
printf ("[ERROR]: %lld\n", (long long int)error);
mpi_stat = MPI_Finalize();
exit(1);
}
//free everything
mkl_free(Q->data);
mkl_free(Q->cols);
mkl_free(Q->row_index);
mkl_free(Q);
mkl_free(x);
mkl_free(y);
mpi_stat = MPI_Finalize();
return 0;
}
COMPILER=mpiicc
MKL_LIBS=-lmkl_intel_ilp64 -lmkl_core -lmkl_blacs_intelmpi_ilp64 -lmkl_scalapack_ilp64 -DMKL_ILP64 -openmp -lpthread -lmkl_intel_thread
GNU_LIBS=-lm
FLAGS=-Wall -g
LIB_PATH=-L/cm/shared/apps/intel/compilers_and_libraries_2016/linux/mkl/lib/intel64 -L/cm/shared/apps/intel/impi/5.0.3.048/intel64/lib
INC_PATH=-I/cm/shared/apps/intel/impi/5.0.3.048/intel64/include -I/cm/shared/apps/intel/compilers_and_libraries_2016/linux/mkl/include/
cl_solver_unsym_distr_c: cl_solver_unsym_distr_c.c random_matrix.h
$(COMPILER) $< -o $@ $(INC_PATH) $(LIB_PATH) $(MKL_LIBS) $(FLAGS) $(OPTIONS) $(GNU_LIBS)
clean:
rm -f cl_solver_unsym_distr_c
#define alloc(TYPE, SIZE) (TYPE *)mkl_malloc(sizeof(TYPE) * (SIZE), 64)
typedef struct
{
double * data;
MKL_INT * cols;
MKL_INT * row_index;
MKL_INT nnz;
MKL_INT nrows;
MKL_INT ncols;
} csr_sparse_matrix;
csr_sparse_matrix * random_matrix(MKL_INT matrix_size, MKL_INT start, MKL_INT finish, int rank)
{
MKL_INT i, j;
MKL_INT current_size = 0;
//Initializing Q
csr_sparse_matrix * Q = alloc(csr_sparse_matrix, 1);
Q->nrows = finish - start + 1;
Q->ncols = matrix_size;
Q->nnz = 0;
Q->data = alloc(double, 1);
Q->cols = alloc(MKL_INT, 1);
Q->row_index = alloc(MKL_INT, Q->nrows + 1);
// buffer each row before adding to the sparse representation
double * buffer = alloc(double, Q->ncols);
MKL_INT k = 0;
for(i = 0; i < (Q->nrows); i ++) {
for(j = 0; j < (Q->ncols); j ++){
double z = rand();
if (z > 0.5) {
buffer[j] = rand();
} else {
buffer[j] = 0.0;
}
}
//number of non-zeros
MKL_INT nnz = 0;
for (j = 0; j < (Q->ncols); j ++) {
nnz += buffer[j] == 0.0 ? 0L : 1L;
}
current_size += nnz + 1L; //add 1 for a diagonal element
//reallocate to fit the non-zeros and the diagonal
Q->data = (double *)mkl_realloc(Q->data, current_size * sizeof(double));
Q->cols = (MKL_INT *)mkl_realloc(Q->cols, current_size * sizeof(MKL_INT));
assert(Q->data != NULL);
assert(Q->cols != NULL);
//iterate the non-zeros and add to the sparse representation
Q->row_index[i] = k;
for (j = 0; j < (Q->ncols); j ++) {
double z = buffer[j];
if (z != 0 || (i + start) == j) {
Q->data[k] = z;
Q->cols[k] = j;
k++;
}
}
}
//free
mkl_free(buffer);
Q->nnz = k;
//last entry of ja should be nnz
Q->row_index[Q->nrows] = Q->nnz;
return(Q);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment