Skip to content

Instantly share code, notes, and snippets.

@f0k
Last active April 8, 2024 07:23
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save f0k/0d6431e3faa60bffc788f8b4daa029b1 to your computer and use it in GitHub Desktop.
Save f0k/0d6431e3faa60bffc788f8b4daa029b1 to your computer and use it in GitHub Desktop.
Simple program to test whether nvcc/CUDA work
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
/* Outputs some information on CUDA-enabled devices on your computer,
* including compute capability and current memory usage.
*
* On Linux, compile with: nvcc -o cuda_check cuda_check.c -lcuda
* On Windows, compile with: nvcc -o cuda_check.exe cuda_check.c -lcuda
*
* Authors: Thomas Unterthiner, Jan Schlüter
*/
int ConvertSMVer2Cores(int major, int minor)
{
// Returns the number of CUDA cores per multiprocessor for a given
// Compute Capability version. There is no way to retrieve that via
// the API, so it needs to be hard-coded.
// See _ConvertSMVer2Cores in helper_cuda.h in NVIDIA's CUDA Samples.
switch ((major << 4) + minor) {
case 0x10: return 8; // Tesla
case 0x11: return 8;
case 0x12: return 8;
case 0x13: return 8;
case 0x20: return 32; // Fermi
case 0x21: return 48;
case 0x30: return 192; // Kepler
case 0x32: return 192;
case 0x35: return 192;
case 0x37: return 192;
case 0x50: return 128; // Maxwell
case 0x52: return 128;
case 0x53: return 128;
case 0x60: return 64; // Pascal
case 0x61: return 128;
case 0x62: return 128;
case 0x70: return 64; // Volta
case 0x72: return 64; // Xavier
case 0x75: return 64; // Turing
case 0x80: return 64; // Ampere
case 0x86: return 128;
case 0x87: return 128;
case 0x89: return 128; // Ada
case 0x90: return 129; // Hopper
default: return 0;
}
}
int main()
{
int nGpus;
int i;
char name[100];
int cc_major, cc_minor, cores, cuda_cores, threads_per_core, clockrate;
size_t freeMem;
size_t totalMem;
CUresult result;
CUdevice device;
CUcontext context;
result = cuInit(0);
if (result != CUDA_SUCCESS) {
printf("cuInit failed with error code %d: %s\n", result, cudaGetErrorString(result));
return 1;
}
result = cuDeviceGetCount(&nGpus);
if (result != CUDA_SUCCESS) {
printf("cuDeviceGetCount failed with error code %d: %s\n", result, cudaGetErrorString(result));
return 1;
}
printf("Found %d device(s).\n", nGpus);
for (i = 0; i < nGpus; i++) {
cuDeviceGet(&device, i);
printf("Device: %d\n", i);
if (cuDeviceGetName(&name[0], sizeof(name), device) == CUDA_SUCCESS) {
printf(" Name: %s\n", &name[0]);
}
if ((cuDeviceGetAttribute(&cc_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device) == CUDA_SUCCESS) &&
(cuDeviceGetAttribute(&cc_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device) == CUDA_SUCCESS)) {
printf(" Compute Capability: %d.%d\n", cc_major, cc_minor);
}
else {
cc_major = cc_minor = 0;
}
if (cuDeviceGetAttribute(&cores, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device) == CUDA_SUCCESS) {
printf(" Multiprocessors: %d\n", cores);
if (cc_major && cc_minor) {
cuda_cores = cores * ConvertSMVer2Cores(cc_major, cc_minor);
if (cuda_cores > 0) {
printf(" CUDA Cores: %d\n", cuda_cores);
}
else {
printf(" CUDA Cores: unknown\n");
}
}
if (cuDeviceGetAttribute(&threads_per_core, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, device) == CUDA_SUCCESS) {
printf(" Concurrent threads: %d\n", cores*threads_per_core);
}
}
if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device) == CUDA_SUCCESS) {
printf(" GPU clock: %g MHz\n", clockrate/1000.);
}
if (cuDeviceGetAttribute(&clockrate, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device) == CUDA_SUCCESS) {
printf(" Memory clock: %g MHz\n", clockrate/1000.);
}
cuCtxCreate(&context, 0, device);
result = cuMemGetInfo(&freeMem, &totalMem);
if (result == CUDA_SUCCESS ) {
printf(" Total Memory: %ld MiB\n Free Memory: %ld MiB\n", totalMem / ( 1024 * 1024 ), freeMem / ( 1024 * 1024 ));
} else {
printf(" cMemGetInfo failed with error code %d: %s\n", result, cudaGetErrorString(result));
}
cuCtxDestroy(context);
}
return 0;
}
@zhmlcg
Copy link

zhmlcg commented Oct 19, 2021

Warnings I encountered when compiling on Linux:

cuda_check.c: In function ‘main’:
cuda_check.c:74:3: warning: ‘cuDeviceComputeCapability’ is deprecated [-Wdeprecated-declarations]
74 | if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) == CUDA_SUCCESS) {
| ^~
In file included from cuda_check.c:2:
/usr/lib/cuda/bin/../targets/x86_64-linux/include/cuda.h:3594:36: note: declared here
3594 | __CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
| ^~~~~~~~~~~~~~~~~~~~~~~~~
cuda_check.c:79:4: warning: ‘cuDeviceComputeCapability’ is deprecated [-Wdeprecated-declarations]
79 | if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) == CUDA_SUCCESS) {
| ^~
In file included from cuda_check.c:2:
/usr/lib/cuda/bin/../targets/x86_64-linux/include/cuda.h:3594:36: note: declared here
3594 | __CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
| ^~~~~~~~~~~~~~~~~~~~~~~~~
cuda_check.c:105:3: warning: ‘cuCtxDetach’ is deprecated [-Wdeprecated-declarations]
105 | cuCtxDetach(context);
| ^~~~~~~~~~~
In file included from cuda_check.c:2:
/usr/lib/cuda/bin/../targets/x86_64-linux/include/cuda.h:4703:36: note: declared here
4703 | __CUDA_DEPRECATED CUresult CUDAAPI cuCtxDetach(CUcontext ctx);
| ^~~~~~~~~~~

I'm using CUDA 11.1. Do you have plan to revise the code to work with CUDA 11?

@prabathbr
Copy link

prabathbr commented Nov 10, 2021

Warnings I encountered when compiling on Linux:

cuda_check.c: In function ‘main’:
cuda_check.c:74:3: warning: ‘cuDeviceComputeCapability’ is deprecated [-Wdeprecated-declarations]
74 | if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) == CUDA_SUCCESS) {
| ^~
In file included from cuda_check.c:2:
/usr/lib/cuda/bin/../targets/x86_64-linux/include/cuda.h:3594:36: note: declared here
3594 | __CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
| ^~~~~~~~~~~~~~~~~~~~~~~~~
cuda_check.c:79:4: warning: ‘cuDeviceComputeCapability’ is deprecated [-Wdeprecated-declarations]
79 | if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) == CUDA_SUCCESS) {
| ^~
In file included from cuda_check.c:2:
/usr/lib/cuda/bin/../targets/x86_64-linux/include/cuda.h:3594:36: note: declared here
3594 | __CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
| ^~~~~~~~~~~~~~~~~~~~~~~~~
cuda_check.c:105:3: warning: ‘cuCtxDetach’ is deprecated [-Wdeprecated-declarations]
105 | cuCtxDetach(context);
| ^~~~~~~~~~~
In file included from cuda_check.c:2:
/usr/lib/cuda/bin/../targets/x86_64-linux/include/cuda.h:4703:36: note: declared here
4703 | __CUDA_DEPRECATED CUresult CUDAAPI cuCtxDetach(CUcontext ctx);
| ^~~~~~~~~~~

I'm using CUDA 11.1. Do you have plan to revise the code to work with CUDA 11?

I was able to compile this code with CUDA 11.5 without any issues on Windows 10

J:\test>nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2021 NVIDIA Corporation
Built on Mon_Sep_13_20:11:50_Pacific_Daylight_Time_2021
Cuda compilation tools, release 11.5, V11.5.50
Build cuda_11.5.r11.5/compiler.30411180_0

(base) J:\test>nvcc -o cuda_check.exe cuda_check.c -lcuda
cuda_check.c
cuda_check.c(101): warning C4477: 'printf' : format string '%ld' requires an argument of type 'long', but variadic argument 1 has type 'size_t'
cuda_check.c(101): note: consider using '%zd' in the format string
cuda_check.c(101): warning C4477: 'printf' : format string '%ld' requires an argument of type 'long', but variadic argument 2 has type 'size_t'
cuda_check.c(101): note: consider using '%zd' in the format string
Creating library cuda_check.lib and object cuda_check.exp

(base) J:\test>cuda_check
Found 1 device(s).
Device: 0
Name: NVIDIA GeForce RTX 3060
Compute Capability: 8.6
Multiprocessors: 28
CUDA Cores: unknown
Concurrent threads: 43008
GPU clock: 1837 MHz
Memory clock: 7501 MHz
Total Memory: 12287 MiB
Free Memory: 11282 MiB

@DABH
Copy link

DABH commented Jan 10, 2022

$ nvcc cuda_check.cu -o cuda_check
cuda_check.cu(59): error: argument of type "CUresult" is incompatible with parameter of type "cudaError_t"

cuda_check.cu(64): error: argument of type "CUresult" is incompatible with parameter of type "cudaError_t"

cuda_check.cu(103): error: argument of type "CUresult" is incompatible with parameter of type "cudaError_t"

3 errors detected in the compilation of "cuda_check.cu".

with CUDA 11.5...

@apivovarov
Copy link

apivovarov commented Jun 7, 2023

@DABH

# Replace
                printf("cuInit failed with error code %d: %s\n", result, cudaGetErrorString(result));
# With
                const char* errStr;
                cuGetErrorString(result, &errStr);
                printf("cuInit failed with error code %d: %s\n", result, errStr);

Fix other two cudaGetErrorString too.
Fixed cuda_check.cu

@JohnTesla
Copy link

jag@Aigen:~$ nvtop
No GPU to monitor.

@f0k
Copy link
Author

f0k commented Jul 10, 2023

@apivovarov: Thanks, nice catch! cuInit is from the driver API and returns a CUresult, while cudaGetErrorString is from the runtime API and expects a cudaError, so the code is mixing the two.

In any case, there are two ways to compile this code:

  1. The one mentioned in the beginning of the file is to store it as cuda_check.c and compile it with nvcc -o cuda_check cuda_check.c -lcuda. This gives some deprecation warning on cuDeviceComputeCapability (as also seen by @zhmlcg), but still works.
  2. The one attempted by @prabathbr and @apivovarov is to store the file as cuda_check.cu and compile it with nvcc -o cuda_check cuda_check.cu. This one does not work.

I guess the code should be fixed to consistently use only the driver API or only the runtime API, but it still works (and is backwards-compatible down to CUDA 3 or so).

@igormorgado
Copy link

igormorgado commented Oct 1, 2023

To compile use:

nvcc -o cuda_check cuda_check.c -lcuda 

To fix the deprecated warning just do the following changes:

Where you read

if (cuDeviceComputeCapability(&cc_major, &cc_minor, device) == CUDA_SUCCESS) {

Replace for:

if ((cuDeviceGetAttribute(&cc_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device) == CUDA_SUCCESS) &&
    (cuDeviceGetAttribute(&cc_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device) == CUDA_SUCCESS)) {

And where you read

cuCtxDetach(context);

Replace for:

cuCtxDestroy(context);

Here

➤ nvcc --version 
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Jun_13_19:16:58_PDT_2023
Cuda compilation tools, release 12.2, V12.2.91
Build cuda_12.2.r12.2/compiler.32965470_0

@f0k
Copy link
Author

f0k commented Oct 3, 2023

Thanks @igormorgado, I've updated the gist accordingly (and also added some missing architectures to ConvertSMVer2Cores).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment