Last active
July 30, 2020 20:00
-
-
Save shivupa/27e752c2795b0f5c6c0695a9d691e7d2 to your computer and use it in GitHub Desktop.
Slate Geev Example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// slate08_hermitian_eig.cc | |
// Solve Hermitian eigenvalues A = V Lambda V^H | |
#include <slate/slate.hh> | |
#include <mpi.h> | |
#include "util.hh" | |
int mpi_size = 0; | |
int mpi_rank = 0; | |
//------------------------------------------------------------------------------ | |
template <typename T> | |
void test_hermitian_eig() | |
{ | |
using real_t = blas::real_type<T>; | |
print_func( mpi_rank ); | |
// TODO: failing if n not divisible by nb? | |
int64_t n=1000, nb=100, p=2, q=2; | |
assert( mpi_size == p*q ); | |
slate::HermitianMatrix<T> A( slate::Uplo::Lower, n, nb, p, q, MPI_COMM_WORLD ); | |
A.insertLocalTiles(); | |
random_matrix( A ); | |
std::vector<real_t> Lambda( n ); | |
// A = V Lambda V^H, eigenvalues only | |
// TODO: rename slate::eig()? | |
//slate::heev( A, Lambda ); | |
// TODO: revert to above interface. | |
// Empty matrix of eigenvectors. | |
slate::Matrix<T> V; | |
slate::heev( slate::Job::NoVec, A, Lambda, V ); | |
#ifdef SLATE_EIG | |
// TODO: eigenvectors | |
//slate::Matrix<T> V( n, n, nb, p, q, MPI_COMM_WORLD ); | |
V.insertLocalTiles(); | |
slate::heev( slate::Job::Vec, A, Lambda, V ); | |
#endif | |
} | |
//------------------------------------------------------------------------------ | |
int main( int argc, char** argv ) | |
{ | |
int provided = 0; | |
int err = MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &provided ); | |
assert( err == 0 ); | |
assert( provided == MPI_THREAD_MULTIPLE ); | |
err = MPI_Comm_size( MPI_COMM_WORLD, &mpi_size ); | |
assert( err == 0 ); | |
if (mpi_size != 4) { | |
printf( "Usage: mpirun -np 4 %s # 4 ranks hard coded\n", argv[0] ); | |
return -1; | |
} | |
err = MPI_Comm_rank( MPI_COMM_WORLD, &mpi_rank ); | |
assert( err == 0 ); | |
// so random_matrix is different on different ranks. | |
srand( 100 * mpi_rank ); | |
test_hermitian_eig< float >(); | |
test_hermitian_eig< double>(); | |
test_hermitian_eig< std::complex<float> >(); | |
test_hermitian_eig< std::complex<double> >(); | |
err = MPI_Finalize(); | |
assert( err == 0 ); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
module purge | |
module load cuda-11.0.2-gcc-10.1.0-jjqofb7 intel-mkl-2020.1.217-gcc-10.1.0-qy37plj openmpi-3.1.6-gcc-10.1.0-gg6lube slate-develop-gcc-10.1.0-y44cca7 | |
rm -rf build | |
mkdir build | |
cd build | |
CXX=mpic++ \ | |
cmake \ | |
-DSLATE_INC="/home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/slate-develop-y44cca7taruqjl52dlojtmhswmpvxrfj/include" \ | |
-DSLATE_LIB="/home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/slate-develop-y44cca7taruqjl52dlojtmhswmpvxrfj/lib" \ | |
-DSLATE_EIGENVECTORS=OFF \ | |
.. | |
make -j4 VERBOSE=1 | |
cd .. | |
mpirun -np 4 ./build/bin/slatetest | |
rm -rf build | |
mkdir build | |
cd build | |
CXX=mpic++ \ | |
cmake \ | |
-DSLATE_INC="/home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/slate-develop-y44cca7taruqjl52dlojtmhswmpvxrfj/include" \ | |
-DSLATE_LIB="/home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/slate-develop-y44cca7taruqjl52dlojtmhswmpvxrfj/lib" \ | |
-DSLATE_EIGENVECTORS=ON \ | |
.. | |
make -j4 VERBOSE=1 | |
cd .. | |
mpirun -np 4 ./build/bin/slatetest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set a requirement on the cmake version | |
cmake_minimum_required(VERSION 3.6.0 FATAL_ERROR) | |
#set(CMAKE_DEBUG_TARGET_PROPERTIES INCLUDE_DIRECTORIES) | |
# Set up the project and version info | |
project(SLATETEST CXX ) | |
# set up where to find Find*.cmake files and where to put the things we build | |
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH}) | |
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY | |
${SLATETEST_BINARY_DIR}/lib | |
CACHE PATH "Single output directory for building all libraries.") | |
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY | |
${SLATETEST_BINARY_DIR}/bin | |
CACHE PATH "Single output directory for building all executables.") | |
set(CMAKE_CXX_STANDARD 20) | |
set(CMAKE_CXX_STANDARD_REQUIRED ON) | |
set(SLATE_INC "software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/slate-develop-y44cca7taruqjl52dlojtmhswmpvxrfj/include" CACHE STRING "Slate include directory") | |
set(SLATE_LIB "software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/slate-develop-y44cca7taruqjl52dlojtmhswmpvxrfj/lib" CACHE STRING "Slate library directory") | |
set(SLATE_EIGENVECTORS OFF CACHE BOOL "Get Eigenvectors?" ) | |
if (SLATE_EIGENVECTORS) | |
message("Turning on Slate Eigenvectors") | |
add_compile_definitions(SLATE_EIG) | |
endif(SLATE_EIGENVECTORS) | |
file( GLOB SLATE_LIBRARIES ${SLATE_LIB}/*.so ) | |
include_directories(${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src) | |
find_package(MPI REQUIRED) | |
find_package(OpenMP REQUIRED) | |
find_package(CUDAToolkit REQUIRED) | |
#find_package(slate REQUIRED) | |
#find_package(blaspp REQUIRED) | |
#find_package(lapackpp REQUIRED) | |
add_executable(slatetest a.cpp) | |
target_include_directories(slatetest PUBLIC include ${CUDAToolkit_INCLUDE_DIRS} ${SLATE_INC}) | |
target_link_libraries(slatetest CUDA::cudart CUDA::cublas MPI::MPI_CXX OpenMP::OpenMP_CXX ${SLATE_LIBRARIES} blaspp lapackpp) | |
message("CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES: ${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") | |
install(TARGETS slatetest DESTINATION bin) | |
# if its a debug build this will print all the cmake variables | |
#if(1) | |
# # found this useful macro on SO https://stackoverflow.com/a/31390758 | |
# macro(print_all_variables) | |
# message( | |
# STATUS "print_all_variables------------------------------------------{") | |
# get_cmake_property(_variableNames VARIABLES) | |
# foreach(_variableName ${_variableNames}) | |
# message(STATUS "${_variableName}=${${_variableName}}") | |
# endforeach() | |
# message( | |
# STATUS "print_all_variables------------------------------------------}") | |
# endmacro() | |
# print_all_variables() | |
#endif(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- The CXX compiler identification is GNU 10.1.0 | |
-- Detecting CXX compiler ABI info | |
-- Detecting CXX compiler ABI info - done | |
-- Check for working CXX compiler: /home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/openmpi-3.1.6-gg6lubep3ae42f32lcfsrtgyoiy7utng/bin/mpic++ - skipped | |
-- Detecting CXX compile features | |
-- Detecting CXX compile features - done | |
-- Found MPI_CXX: /home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/openmpi-3.1.6-gg6lubep3ae42f32lcfsrtgyoiy7utng/bin/mpic++ (found version "3.1") | |
-- Found MPI: TRUE (found version "3.1") | |
-- Found OpenMP_CXX: -fopenmp (found version "4.5") | |
-- Found OpenMP: TRUE (found version "4.5") | |
-- Found CUDAToolkit: /home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/cuda-11.0.2-jjqofb7vx32nbsx4onhanyc2trh2appc/include (found version "11.0.194") | |
-- Looking for C++ include pthread.h | |
-- Looking for C++ include pthread.h - found | |
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD | |
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success | |
-- Found Threads: TRUE | |
-- Configuring done | |
-- Generating done | |
-- Build files have been written to: /home/shiv/mega/lapackpptest/build | |
/usr/bin/cmake -S/home/shiv/mega/lapackpptest -B/home/shiv/mega/lapackpptest/build --check-build-system CMakeFiles/Makefile.cmake 0 | |
/usr/bin/cmake -E cmake_progress_start /home/shiv/mega/lapackpptest/build/CMakeFiles /home/shiv/mega/lapackpptest/build//CMakeFiles/progress.marks | |
make -f CMakeFiles/Makefile2 all | |
make[1]: Entering directory '/home/shiv/mega/lapackpptest/build' | |
make -f CMakeFiles/slatetest.dir/build.make CMakeFiles/slatetest.dir/depend | |
make[2]: Entering directory '/home/shiv/mega/lapackpptest/build' | |
cd /home/shiv/mega/lapackpptest/build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /home/shiv/mega/lapackpptest /home/shiv/mega/lapackpptest /home/shiv/mega/lapackpptest/build /home/shiv/mega/lapackpptest/build /home/shiv/mega/lapackpptest/build/CMakeFiles/slatetest.dir/DependInfo.cmake --color= | |
Dependee "/home/shiv/mega/lapackpptest/build/CMakeFiles/slatetest.dir/DependInfo.cmake" is newer than depender "/home/shiv/mega/lapackpptest/build/CMakeFiles/slatetest.dir/depend.internal". | |
Dependee "/home/shiv/mega/lapackpptest/build/CMakeFiles/CMakeDirectoryInformation.cmake" is newer than depender "/home/shiv/mega/lapackpptest/build/CMakeFiles/slatetest.dir/depend.internal". | |
Scanning dependencies of target slatetest | |
make[2]: Leaving directory '/home/shiv/mega/lapackpptest/build' | |
make -f CMakeFiles/slatetest.dir/build.make CMakeFiles/slatetest.dir/build | |
make[2]: Entering directory '/home/shiv/mega/lapackpptest/build' | |
[ 50%] Building CXX object CMakeFiles/slatetest.dir/a.cpp.o | |
/home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/openmpi-3.1.6-gg6lubep3ae42f32lcfsrtgyoiy7utng/bin/mpic++ -I/home/shiv/mega/lapackpptest/src -I/home/shiv/mega/lapackpptest/build/src -I/home/shiv/mega/lapackpptest/include -fopenmp -std=gnu++2a -o CMakeFiles/slatetest.dir/a.cpp.o -c /home/shiv/mega/lapackpptest/a.cpp | |
[100%] Linking CXX executable bin/slatetest | |
/usr/bin/cmake -E cmake_link_script CMakeFiles/slatetest.dir/link.txt --verbose=1 | |
/home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/openmpi-3.1.6-gg6lubep3ae42f32lcfsrtgyoiy7utng/bin/mpic++ CMakeFiles/slatetest.dir/a.cpp.o -o bin/slatetest -lblaspp -llapackpp -lslate -lslate_lapack_api -lslate_scalapack_api -lblaspp -llapackpp /home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/cuda-11.0.2-jjqofb7vx32nbsx4onhanyc2trh2appc/lib64/libcudart.so /home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/cuda-11.0.2-jjqofb7vx32nbsx4onhanyc2trh2appc/lib64/libcublas.so /usr/lib/libgomp.so | |
make[2]: Leaving directory '/home/shiv/mega/lapackpptest/build' | |
[100%] Built target slatetest | |
make[1]: Leaving directory '/home/shiv/mega/lapackpptest/build' | |
/usr/bin/cmake -E cmake_progress_start /home/shiv/mega/lapackpptest/build/CMakeFiles 0 | |
rank 3: void test_hermitian_eig() [with T = float] | |
rank 1: void test_hermitian_eig() [with T = float] | |
rank 2: void test_hermitian_eig() [with T = float] | |
rank 0: void test_hermitian_eig() [with T = float] | |
rank 3: void test_hermitian_eig() [with T = double] | |
rank 2: void test_hermitian_eig() [with T = double] | |
rank 0: void test_hermitian_eig() [with T = double] | |
rank 1: void test_hermitian_eig() [with T = double] | |
rank 3: void test_hermitian_eig() [with T = std::complex<float>] | |
rank 2: void test_hermitian_eig() [with T = std::complex<float>] | |
rank 0: void test_hermitian_eig() [with T = std::complex<float>] | |
rank 1: void test_hermitian_eig() [with T = std::complex<float>] | |
rank 3: void test_hermitian_eig() [with T = std::complex<double>] | |
rank 0: void test_hermitian_eig() [with T = std::complex<double>] | |
rank 2: void test_hermitian_eig() [with T = std::complex<double>] | |
rank 1: void test_hermitian_eig() [with T = std::complex<double>] | |
-- The CXX compiler identification is GNU 10.1.0 | |
-- Detecting CXX compiler ABI info | |
-- Detecting CXX compiler ABI info - done | |
-- Check for working CXX compiler: /home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/openmpi-3.1.6-gg6lubep3ae42f32lcfsrtgyoiy7utng/bin/mpic++ - skipped | |
-- Detecting CXX compile features | |
-- Detecting CXX compile features - done | |
-- Found MPI_CXX: /home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/openmpi-3.1.6-gg6lubep3ae42f32lcfsrtgyoiy7utng/bin/mpic++ (found version "3.1") | |
-- Found MPI: TRUE (found version "3.1") | |
-- Found OpenMP_CXX: -fopenmp (found version "4.5") | |
-- Found OpenMP: TRUE (found version "4.5") | |
-- Found CUDAToolkit: /home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/cuda-11.0.2-jjqofb7vx32nbsx4onhanyc2trh2appc/include (found version "11.0.194") | |
-- Looking for C++ include pthread.h | |
-- Looking for C++ include pthread.h - found | |
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD | |
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success | |
-- Found Threads: TRUE | |
-- Configuring done | |
-- Generating done | |
-- Build files have been written to: /home/shiv/mega/lapackpptest/build | |
/usr/bin/cmake -S/home/shiv/mega/lapackpptest -B/home/shiv/mega/lapackpptest/build --check-build-system CMakeFiles/Makefile.cmake 0 | |
/usr/bin/cmake -E cmake_progress_start /home/shiv/mega/lapackpptest/build/CMakeFiles /home/shiv/mega/lapackpptest/build//CMakeFiles/progress.marks | |
make -f CMakeFiles/Makefile2 all | |
make[1]: Entering directory '/home/shiv/mega/lapackpptest/build' | |
make -f CMakeFiles/slatetest.dir/build.make CMakeFiles/slatetest.dir/depend | |
make[2]: Entering directory '/home/shiv/mega/lapackpptest/build' | |
cd /home/shiv/mega/lapackpptest/build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /home/shiv/mega/lapackpptest /home/shiv/mega/lapackpptest /home/shiv/mega/lapackpptest/build /home/shiv/mega/lapackpptest/build /home/shiv/mega/lapackpptest/build/CMakeFiles/slatetest.dir/DependInfo.cmake --color= | |
Dependee "/home/shiv/mega/lapackpptest/build/CMakeFiles/slatetest.dir/DependInfo.cmake" is newer than depender "/home/shiv/mega/lapackpptest/build/CMakeFiles/slatetest.dir/depend.internal". | |
Dependee "/home/shiv/mega/lapackpptest/build/CMakeFiles/CMakeDirectoryInformation.cmake" is newer than depender "/home/shiv/mega/lapackpptest/build/CMakeFiles/slatetest.dir/depend.internal". | |
Scanning dependencies of target slatetest | |
make[2]: Leaving directory '/home/shiv/mega/lapackpptest/build' | |
make -f CMakeFiles/slatetest.dir/build.make CMakeFiles/slatetest.dir/build | |
make[2]: Entering directory '/home/shiv/mega/lapackpptest/build' | |
[ 50%] Building CXX object CMakeFiles/slatetest.dir/a.cpp.o | |
/home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/openmpi-3.1.6-gg6lubep3ae42f32lcfsrtgyoiy7utng/bin/mpic++ -DSLATE_EIG -I/home/shiv/mega/lapackpptest/src -I/home/shiv/mega/lapackpptest/build/src -I/home/shiv/mega/lapackpptest/include -fopenmp -std=gnu++2a -o CMakeFiles/slatetest.dir/a.cpp.o -c /home/shiv/mega/lapackpptest/a.cpp | |
[100%] Linking CXX executable bin/slatetest | |
/usr/bin/cmake -E cmake_link_script CMakeFiles/slatetest.dir/link.txt --verbose=1 | |
/home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/openmpi-3.1.6-gg6lubep3ae42f32lcfsrtgyoiy7utng/bin/mpic++ CMakeFiles/slatetest.dir/a.cpp.o -o bin/slatetest -lblaspp -llapackpp -lslate -lslate_lapack_api -lslate_scalapack_api -lblaspp -llapackpp /home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/cuda-11.0.2-jjqofb7vx32nbsx4onhanyc2trh2appc/lib64/libcudart.so /home/shiv/software/spack/opt/spack/linux-archrolling-sandybridge/gcc-10.1.0/cuda-11.0.2-jjqofb7vx32nbsx4onhanyc2trh2appc/lib64/libcublas.so /usr/lib/libgomp.so | |
make[2]: Leaving directory '/home/shiv/mega/lapackpptest/build' | |
[100%] Built target slatetest | |
make[1]: Leaving directory '/home/shiv/mega/lapackpptest/build' | |
/usr/bin/cmake -E cmake_progress_start /home/shiv/mega/lapackpptest/build/CMakeFiles 0 | |
rank 0: void test_hermitian_eig() [with T = float] | |
rank 2: void test_hermitian_eig() [with T = float] | |
rank 3: void test_hermitian_eig() [with T = float] | |
rank 1: void test_hermitian_eig() [with T = float] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef UTIL_H | |
#define UTIL_H | |
#include <blas.hh> | |
#include <stdio.h> | |
//------------------------------------------------------------------------------ | |
void print_func_( int rank, const char* func ) | |
{ | |
printf( "rank %d: %s\n", rank, func ); | |
} | |
#ifdef __GNUC__ | |
#define print_func( rank ) print_func_( rank, __PRETTY_FUNCTION__ ) | |
#else | |
#define print_func( rank ) print_func_( rank, __func__ ) | |
#endif | |
//------------------------------------------------------------------------------ | |
// utility to create real or complex number | |
template <typename scalar_type> | |
scalar_type make( blas::real_type<scalar_type> re, | |
blas::real_type<scalar_type> im ) | |
{ | |
return re; | |
} | |
template <typename T> | |
std::complex<T> make( T re, T im ) | |
{ | |
return std::complex<T>( re, im ); | |
} | |
//------------------------------------------------------------------------------ | |
// generate random matrix A | |
template <typename scalar_type> | |
void random_matrix( int64_t m, int64_t n, scalar_type* A, int64_t lda ) | |
{ | |
for (int64_t j = 0; j < n; ++j) { | |
for (int64_t i = 0; i < m; ++i) { | |
A[ i + j*lda ] = make<scalar_type>( rand() / double(RAND_MAX), | |
rand() / double(RAND_MAX) ); | |
} | |
} | |
} | |
//------------------------------------------------------------------------------ | |
// generate random, diagonally dominant matrix A | |
template <typename scalar_type> | |
void random_matrix_diag_dominant( int64_t m, int64_t n, scalar_type* A, int64_t lda ) | |
{ | |
using blas::real; | |
int64_t max_mn = std::max( m, n ); | |
for (int64_t j = 0; j < n; ++j) { | |
for (int64_t i = 0; i < m; ++i) { | |
A[ i + j*lda ] = make<scalar_type>( rand() / double(RAND_MAX), | |
rand() / double(RAND_MAX) ); | |
} | |
if (j < m) { | |
// make diagonal real & dominant | |
A[ j + j*lda ] = real( A[j + j*lda] ) + max_mn; | |
} | |
} | |
} | |
//------------------------------------------------------------------------------ | |
// generate random matrix A | |
template <typename matrix_type> | |
void random_matrix( matrix_type& A ) | |
{ | |
for (int64_t j = 0; j < A.nt(); ++j) { | |
for (int64_t i = 0; i < A.mt(); ++i) { | |
if (A.tileIsLocal( i, j )) { | |
try { | |
auto T = A( i, j ); | |
random_matrix( T.mb(), T.nb(), T.data(), T.stride() ); | |
} | |
catch (...) { | |
// ignore missing tiles | |
} | |
} | |
} | |
} | |
} | |
//------------------------------------------------------------------------------ | |
// generate random, diagonally dominant matrix A | |
template <typename matrix_type> | |
void random_matrix_diag_dominant( matrix_type& A ) | |
{ | |
using blas::real; | |
int64_t max_mn = std::max( A.m(), A.n() ); | |
for (int64_t j = 0; j < A.nt(); ++j) { | |
for (int64_t i = 0; i < A.mt(); ++i) { | |
if (A.tileIsLocal( i, j )) { | |
try { | |
auto T = A( i, j ); | |
random_matrix( T.mb(), T.nb(), T.data(), T.stride() ); | |
if (i == j) { | |
// assuming tileMb == tileNb, then i == j are diagonal tiles | |
// make diagonal real & dominant | |
int64_t min_mb_nb = std::min( T.mb(), T.nb() ); | |
for (int64_t ii = 0; ii < min_mb_nb; ++ii) { | |
T.at(ii, ii) = real( T.at(ii, ii) ) + max_mn; | |
} | |
} | |
} | |
catch (...) { | |
// ignore missing tiles | |
} | |
} | |
} | |
} | |
} | |
//------------------------------------------------------------------------------ | |
template <typename scalar_type> | |
void print_matrix( const char* label, int m, int n, scalar_type* A, int lda ) | |
{ | |
using blas::real; | |
using blas::imag; | |
printf( "%s = [\n", label ); | |
for (int i = 0; i < n; ++i) { | |
for (int j = 0; j < n; ++j) { | |
if (blas::is_complex<scalar_type>::value) { | |
printf( " %7.4f + %7.4fi", real(A[i + j*lda]), imag(A[i + j*lda]) ); | |
} | |
else { | |
printf( " %7.4f", real(A[i + j*lda]) ); | |
} | |
} | |
printf( "\n" ); | |
} | |
printf( "];\n" ); | |
} | |
//------------------------------------------------------------------------------ | |
// suppress compiler "unused" warning for variable x | |
#define unused( x ) ((void) x) | |
#endif // UTIL_H | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment