ASKabalan/CUDA Macros.h

## CUDA Macros.h
#include <cuda_runtime.h>
#include <iostream>

#define CHECK_CUDA_ERROR(val) check((val), #val, __FILE__, __LINE__)
void check(cudaError_t err, const char* const func, const char* const file,
           const int line)
{
    if (err != cudaSuccess)
    {
        std::cerr << "CUDA Runtime Error at: " << file << ":" << line
                  << std::endl;
        std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
        std::exit(EXIT_FAILURE);
    }
}

#define CHECK_LAST_CUDA_ERROR() checkLast(__FILE__, __LINE__)
void checkLast(const char* const file, const int line)
{
    cudaError_t const err{cudaGetLastError()};
    if (err != cudaSuccess)
    {
        std::cerr << "CUDA Runtime Error at: " << file << ":" << line
                  << std::endl;
        std::cerr << cudaGetErrorString(err) << std::endl;
        std::exit(EXIT_FAILURE);
    }
}

## Indexing 2D .h
// 2D Row major indexing XY order
#define I2D_RM(row_dim, col_dim, r, c) ((r) * (col_dim) + (c))

// 2D Column major indexing XY order
#define I2D_CM(row_dim, col_dim, r, c) ((c) * (row_dim) + (r))

// 3D Accessing
#define XYZ_ORDER 0
#define YXZ_ORDER 1
#define ZYX_ORDER 2
#define YZX_ORDER 3
#define XZY_ORDER 4
#define ZXY_ORDER 5

#define I3D(x_dim, y_dim, z_dim, x, y, z, order)                               \
  ((order == XYZ_ORDER)   ? ((x) * (y_dim) + (y)) * (z_dim) + (z)              \
   : (order == YXZ_ORDER) ? ((y) * (x_dim) + (x)) * (z_dim) + (z)              \
   : (order == ZYX_ORDER) ? ((z) * (x_dim) + (x)) * (y_dim) + (y)              \
   : (order == YZX_ORDER) ? ((y) * (z_dim) + (z)) * (x_dim) + (x)              \
   : (order == XZY_ORDER) ? ((x) * (z_dim) + (z)) * (y_dim) + (y)              \
   : (order == ZXY_ORDER) ? ((z) * (y_dim) + (y)) * (x_dim) + (x)              \
                          : 0)

## Indexing 3D .h
// 3D Accessing
#define XYZ_ORDER 0
#define YXZ_ORDER 1
#define ZYX_ORDER 2
#define YZX_ORDER 3
#define XZY_ORDER 4
#define ZXY_ORDER 5

#define I3D(x_dim, y_dim, z_dim, x, y, z, order)                               \
  ((order == XYZ_ORDER)   ? ((x) * (y_dim) + (y)) * (z_dim) + (z)              \
   : (order == YXZ_ORDER) ? ((y) * (x_dim) + (x)) * (z_dim) + (z)              \
   : (order == ZYX_ORDER) ? ((z) * (x_dim) + (x)) * (y_dim) + (y)              \
   : (order == YZX_ORDER) ? ((y) * (z_dim) + (z)) * (x_dim) + (x)              \
   : (order == XZY_ORDER) ? ((x) * (z_dim) + (z)) * (y_dim) + (y)              \
   : (order == ZXY_ORDER) ? ((z) * (y_dim) + (y)) * (x_dim) + (x)              \
                          : 0)

## query_dev.cu
#include <iostream>

int main() {
  int deviceCount;
  cudaGetDeviceCount(&deviceCount);
  std::cout << "Device count: " << deviceCount << std::endl;
  for (int i = 0; i < deviceCount; i++) {
    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp, i);
    std::cout << "Device " << i << " has " << std::endl
              << "\tname: " << deviceProp.name << std::endl
              << "\tcapability: " << deviceProp.major << "." << deviceProp.minor
              << std::endl
              << "\ttotalGlobalMem: " << deviceProp.totalGlobalMem << std::endl
              << "\tsharedMemPerBlock: " << deviceProp.sharedMemPerBlock
              << std::endl
              << "\ttregsPerBlock: " << deviceProp.regsPerBlock << std::endl
              << "\twarpSize: " << deviceProp.warpSize << std::endl
              << "\tMaxThreadsPerBlock: " << deviceProp.maxThreadsPerBlock
              << std::endl
              << "\tMaxThreadsDim: " << deviceProp.maxThreadsDim[0] << " "
              << deviceProp.maxThreadsDim[1] << " "
              << deviceProp.maxThreadsDim[2] << std::endl
              << "\tMaxGridSize: " << deviceProp.maxGridSize[0] << " "
              << deviceProp.maxGridSize[1] << " " << deviceProp.maxGridSize[2]
              << std::endl
              << "\tMaxStreamingMultiprocessors: "
              << deviceProp.multiProcessorCount << std::endl;
  }
  return 0;
}
	#include <cuda_runtime.h>
	#include <iostream>

	#define CHECK_CUDA_ERROR(val) check((val), #val, __FILE__, __LINE__)
	void check(cudaError_t err, const char* const func, const char* const file,
	const int line)
	{
	if (err != cudaSuccess)
	{
	std::cerr << "CUDA Runtime Error at: " << file << ":" << line
	<< std::endl;
	std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
	std::exit(EXIT_FAILURE);
	}
	}

	#define CHECK_LAST_CUDA_ERROR() checkLast(__FILE__, __LINE__)
	void checkLast(const char* const file, const int line)
	{
	cudaError_t const err{cudaGetLastError()};
	if (err != cudaSuccess)
	{
	std::cerr << "CUDA Runtime Error at: " << file << ":" << line
	<< std::endl;
	std::cerr << cudaGetErrorString(err) << std::endl;
	std::exit(EXIT_FAILURE);
	}
	}
	// 2D Row major indexing XY order
	#define I2D_RM(row_dim, col_dim, r, c) ((r) * (col_dim) + (c))

	// 2D Column major indexing XY order
	#define I2D_CM(row_dim, col_dim, r, c) ((c) * (row_dim) + (r))

	// 3D Accessing
	#define XYZ_ORDER 0
	#define YXZ_ORDER 1
	#define ZYX_ORDER 2
	#define YZX_ORDER 3
	#define XZY_ORDER 4
	#define ZXY_ORDER 5

	#define I3D(x_dim, y_dim, z_dim, x, y, z, order) \
	((order == XYZ_ORDER) ? ((x) * (y_dim) + (y)) * (z_dim) + (z) \
	: (order == YXZ_ORDER) ? ((y) * (x_dim) + (x)) * (z_dim) + (z) \
	: (order == ZYX_ORDER) ? ((z) * (x_dim) + (x)) * (y_dim) + (y) \
	: (order == YZX_ORDER) ? ((y) * (z_dim) + (z)) * (x_dim) + (x) \
	: (order == XZY_ORDER) ? ((x) * (z_dim) + (z)) * (y_dim) + (y) \
	: (order == ZXY_ORDER) ? ((z) * (y_dim) + (y)) * (x_dim) + (x) \
	: 0)
	#include <iostream>

	int main() {
	int deviceCount;
	cudaGetDeviceCount(&deviceCount);
	std::cout << "Device count: " << deviceCount << std::endl;
	for (int i = 0; i < deviceCount; i++) {
	cudaDeviceProp deviceProp;
	cudaGetDeviceProperties(&deviceProp, i);
	std::cout << "Device " << i << " has " << std::endl
	<< "\tname: " << deviceProp.name << std::endl
	<< "\tcapability: " << deviceProp.major << "." << deviceProp.minor
	<< std::endl
	<< "\ttotalGlobalMem: " << deviceProp.totalGlobalMem << std::endl
	<< "\tsharedMemPerBlock: " << deviceProp.sharedMemPerBlock
	<< std::endl
	<< "\ttregsPerBlock: " << deviceProp.regsPerBlock << std::endl
	<< "\twarpSize: " << deviceProp.warpSize << std::endl
	<< "\tMaxThreadsPerBlock: " << deviceProp.maxThreadsPerBlock
	<< std::endl
	<< "\tMaxThreadsDim: " << deviceProp.maxThreadsDim[0] << " "
	<< deviceProp.maxThreadsDim[1] << " "
	<< deviceProp.maxThreadsDim[2] << std::endl
	<< "\tMaxGridSize: " << deviceProp.maxGridSize[0] << " "
	<< deviceProp.maxGridSize[1] << " " << deviceProp.maxGridSize[2]
	<< std::endl
	<< "\tMaxStreamingMultiprocessors: "
	<< deviceProp.multiProcessorCount << std::endl;
	}
	return 0;
	}