Skip to content

Instantly share code, notes, and snippets.

@sandeepkumar-skb
Forked from teju85/device-prop-test.cu
Created September 16, 2020 18:34
Show Gist options
  • Save sandeepkumar-skb/3f827b5474a15c88ddd5e9d72fea7ad8 to your computer and use it in GitHub Desktop.
Save sandeepkumar-skb/3f827b5474a15c88ddd5e9d72fea7ad8 to your computer and use it in GitHub Desktop.
Sample example to compare perf of cudaGetDeviceProperties and cudaDeviceGetAttribute
// Compiling and running this program:
// nvcc -std=c++11 device-prop-test.cu && ./a.out
#include <chrono>
#include <iostream>
using namespace std;
#define CUDA_CHECK(call) \
do { \
cudaError_t status = call; \
if(status != cudaSuccess) { \
printf("FAIL: call='%s'. Reason:%s\n", #call, \
cudaGetErrorString(status)); \
return -1; \
} \
} while (0)
int main(int argc, char** argv) {
int devId;
CUDA_CHECK(cudaGetDevice(&devId));
auto start = chrono::high_resolution_clock::now();
cudaDeviceProp prop;
for(int i = 0; i < 25; ++i) {
CUDA_CHECK(cudaGetDeviceProperties(&prop, devId));
}
auto end = chrono::high_resolution_clock::now();
cout
<< "cudaGetDeviceProperties -> "
<< chrono::duration_cast<chrono::microseconds>(end - start).count() / 25.0
<< "us" << endl;
int smemSize, numProcs;
start = chrono::high_resolution_clock::now();
for(int i = 0; i < 25; ++i) {
CUDA_CHECK(cudaDeviceGetAttribute(&smemSize,
cudaDevAttrMaxSharedMemoryPerBlock,
devId));
CUDA_CHECK(cudaDeviceGetAttribute(&numProcs,
cudaDevAttrMultiProcessorCount,
devId));
}
end = chrono::high_resolution_clock::now();
cout
<< "cudaDeviceGetAttribute -> "
<< chrono::duration_cast<chrono::microseconds>(end - start).count() / 25.0
<< "us" << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment