Skip to content

Instantly share code, notes, and snippets.

@nouiz
Forked from mrocklin/time_cudaMemcpyAsync.cu
Created October 1, 2012 20:26
Show Gist options
  • Save nouiz/3814208 to your computer and use it in GitHub Desktop.
Save nouiz/3814208 to your computer and use it in GitHub Desktop.
A quick CUDA program to time the effectiveness of using asynchronous CPU-GPU memory transfers.
#include <stdio.h>
#include <sys/time.h>
const int n = 160000000;
// Print number of milliseconds between timevals
void printDuration(timeval a, timeval b, char* message)
{
double elapsedTime = (b.tv_sec - a.tv_sec) * 1000.0;
elapsedTime += (b.tv_usec - a.tv_usec) / 1000.0;
printf("%s: %lfms\n", message, elapsedTime);
}
int main()
{
timeval a, b, c;
int bytes = n * sizeof(float);
gettimeofday(&a, NULL);
gettimeofday(&c, NULL);
gettimeofday(&b, NULL);
printDuration(a, b, "gettimeofday time");
gettimeofday(&a, NULL);
float* hdata = (float*)malloc(bytes);
gettimeofday(&b, NULL);
printDuration(a, b, "Host Malloc time");
float* ddata;
// Here is a stream if you want to play with intra-GPU concurrency
cudaStream_t s;
if (cudaStreamCreate(&s) != cudaSuccess)
printf("Error!\n");
// Allocate array on GPU memory
gettimeofday(&a, NULL);
cudaMalloc( (void**)&ddata, bytes);
gettimeofday(&b, NULL);
printDuration(a, b, "Device Malloc time");
// Transfer CPU - GPU synchronously
cudaThreadSynchronize();
gettimeofday(&a, NULL);
for(int i=0;i<10;i++)
if (cudaMemcpy(ddata, hdata, bytes, cudaMemcpyHostToDevice) != cudaSuccess)
printf("Error!\n");
gettimeofday(&b, NULL);
cudaThreadSynchronize();
printDuration(a, b, "cudaMemCpy Time");
// Transfer CPU - GPU asynchronously
cudaThreadSynchronize();
gettimeofday(&a, NULL);
for(int i=0;i<10;i++)
if (cudaMemcpyAsync(ddata, hdata, bytes, cudaMemcpyHostToDevice, s)
!= cudaSuccess)
printf("Error!\n");
gettimeofday(&b, NULL);
cudaThreadSynchronize();
printDuration(a, b, "cudaMemCpyAsync Time");
cudaFree(ddata);
cudaDeviceProp dev;
cudaGetDeviceProperties(&dev, 0);
printf("Engine count %d\n", dev.asyncEngineCount);
printf("Gpu name %s\n", dev.name);
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment