Skip to content

Instantly share code, notes, and snippets.

@teju85
Last active September 16, 2020 18:34
Show Gist options
  • Save teju85/9521e2224f0c31f71a93b593ff64e8da to your computer and use it in GitHub Desktop.
Save teju85/9521e2224f0c31f71a93b593ff64e8da to your computer and use it in GitHub Desktop.
Sample example to compare perf of cudaGetDeviceProperties and cudaDeviceGetAttribute
// Compiling and running this program:
// nvcc -std=c++11 device-prop-test.cu && ./a.out
#include <chrono>
#include <iostream>
using namespace std;
#define CUDA_CHECK(call) \
do { \
cudaError_t status = call; \
if(status != cudaSuccess) { \
printf("FAIL: call='%s'. Reason:%s\n", #call, \
cudaGetErrorString(status)); \
return -1; \
} \
} while (0)
int main(int argc, char** argv) {
int devId;
CUDA_CHECK(cudaGetDevice(&devId));
auto start = chrono::high_resolution_clock::now();
cudaDeviceProp prop;
for(int i = 0; i < 25; ++i) {
CUDA_CHECK(cudaGetDeviceProperties(&prop, devId));
}
auto end = chrono::high_resolution_clock::now();
cout
<< "cudaGetDeviceProperties -> "
<< chrono::duration_cast<chrono::microseconds>(end - start).count() / 25.0
<< "us" << endl;
int smemSize, numProcs;
start = chrono::high_resolution_clock::now();
for(int i = 0; i < 25; ++i) {
CUDA_CHECK(cudaDeviceGetAttribute(&smemSize,
cudaDevAttrMaxSharedMemoryPerBlock,
devId));
CUDA_CHECK(cudaDeviceGetAttribute(&numProcs,
cudaDevAttrMultiProcessorCount,
devId));
}
end = chrono::high_resolution_clock::now();
cout
<< "cudaDeviceGetAttribute -> "
<< chrono::duration_cast<chrono::microseconds>(end - start).count() / 25.0
<< "us" << endl;
return 0;
}
@adarsharao
Copy link

I have stolen cuda_check macro from you !! thanks..

@teju85
Copy link
Author

teju85 commented Nov 25, 2019

Sure. Enjoy!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment