Skip to content

Instantly share code, notes, and snippets.

@Vesnica
Last active March 27, 2023 06:13
Show Gist options
  • Save Vesnica/059f52b8c1f637d3bbd68494ef0b7b7c to your computer and use it in GitHub Desktop.
Save Vesnica/059f52b8c1f637d3bbd68494ef0b7b7c to your computer and use it in GitHub Desktop.
CUDA Device Capable
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
int main(int argc, char **argv)
{
int maxThreadsPerBlock;
cudaDeviceGetAttribute(&maxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, 0);
int maxBlocksPerSM;
cudaDeviceGetAttribute(&maxBlocksPerSM, cudaDevAttrMaxBlocksPerMultiprocessor, 0);
int numSMs;
cudaDeviceGetAttribute(&numSMs, cudaDevAttrMultiProcessorCount, 0);
printf("Maximum number of threads per block: %d\n", maxThreadsPerBlock);
printf("Maximum number of blocks per SM: %d\n", maxBlocksPerSM);
printf("Number of SMs: %d\n", numSMs);
int maxGridSize[3];
cudaDeviceGetAttribute(&maxGridSize[0], cudaDevAttrMaxGridDimX, 0);
cudaDeviceGetAttribute(&maxGridSize[1], cudaDevAttrMaxGridDimY, 0);
cudaDeviceGetAttribute(&maxGridSize[2], cudaDevAttrMaxGridDimZ, 0);
printf("Maximum grid size: %d x %d x %d\n", maxGridSize[0], maxGridSize[1], maxGridSize[2]);
int maxSharedMemory;
cudaDeviceGetAttribute(&maxSharedMemory, cudaDevAttrMaxSharedMemoryPerBlock, 0);
printf("Maximum shared memory per block: %d bytes\n", maxSharedMemory);
int warpSize;
cudaDeviceGetAttribute(&warpSize, cudaDevAttrWarpSize, 0);
printf("Warp size: %d\n", warpSize);
int maxRegistersPerBlock;
cudaDeviceGetAttribute(&maxRegistersPerBlock, cudaDevAttrMaxRegistersPerBlock, 0);
printf("Maximum number of registers per block: %d\n", maxRegistersPerBlock);
int maxSharedMemoryPerMultiprocessor;
cudaDeviceGetAttribute(&maxSharedMemoryPerMultiprocessor, cudaDevAttrMaxSharedMemoryPerMultiprocessor, 0);
printf("Maximum shared memory per multiprocessor: %d bytes\n", maxSharedMemoryPerMultiprocessor);
int maxRegistersPerMultiprocessor;
cudaDeviceGetAttribute(&maxRegistersPerMultiprocessor, cudaDevAttrMaxRegistersPerMultiprocessor, 0);
printf("Maximum number of registers per multiprocessor: %d\n", maxRegistersPerMultiprocessor);
int maxThreadsPerMultiprocessor;
cudaDeviceGetAttribute(&maxThreadsPerMultiprocessor, cudaDevAttrMaxThreadsPerMultiProcessor, 0);
printf("Maximum number of threads per multiprocessor: %d\n", maxThreadsPerMultiprocessor);
int memoryClockRate;
cudaDeviceGetAttribute(&memoryClockRate, cudaDevAttrMemoryClockRate, 0);
printf("Memory clock rate: %d kHz\n", memoryClockRate);
int memoryBusWidth;
cudaDeviceGetAttribute(&memoryBusWidth, cudaDevAttrGlobalMemoryBusWidth, 0);
printf("Memory bus width: %d bits\n", memoryBusWidth);
int L2CacheSize;
cudaDeviceGetAttribute(&L2CacheSize, cudaDevAttrL2CacheSize, 0);
printf("L2 cache size: %d bytes\n", L2CacheSize);
int Major;
cudaDeviceGetAttribute(&Major, cudaDevAttrComputeCapabilityMajor, 0);
int Minor;
cudaDeviceGetAttribute(&Minor, cudaDevAttrComputeCapabilityMinor, 0);
printf("Compute capability: %d.%d\n", Major, Minor);
}
Maximum number of threads per block: 1024
Maximum number of blocks per SM: 32
Number of SMs: 20
Maximum grid size: 2147483647 x 65535 x 65535
Maximum shared memory per block: 49152 bytes
Warp size: 32
Maximum number of registers per block: 65536
Maximum shared memory per multiprocessor: 98304 bytes
Maximum number of registers per multiprocessor: 65536
Maximum number of threads per multiprocessor: 2048
Memory clock rate: 5005000 kHz
Memory bus width: 256 bits
L2 cache size: 2097152 bytes
Compute capability: 6.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment