Skip to content

Instantly share code, notes, and snippets.

@jrprice
Last active August 8, 2017 16:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jrprice/11c76611b9ed93caba9bff51272e5af5 to your computer and use it in GitHub Desktop.
Save jrprice/11c76611b9ed93caba9bff51272e5af5 to your computer and use it in GitHub Desktop.
CUDA driver API blocking cuMemAlloc issue
#include <stdio.h>
#include <unistd.h>
#include <cuda.h>
#include <pthread.h>
void check(CUresult err);
int size;
CUdevice device;
CUcontext context;
CUstream stream;
CUdeviceptr dev_A, dev_B;
void *host;
int *host_flag;
void* run_background_memcpy(void *data)
{
CUresult err;
cuCtxSetCurrent(context);
// Make stream wait for a flag
CUdeviceptr dev_flag;
err = cuMemHostAlloc((void*)&host_flag, 4, CU_MEMHOSTALLOC_DEVICEMAP);
check(err);
*host_flag = 0;
err = cuMemHostGetDevicePointer(&dev_flag, host_flag, 0);
check(err);
err = cuStreamWaitValue32(stream, dev_flag, 1, CU_STREAM_WAIT_VALUE_GEQ);
check(err);
// Enqueue a memcpy
printf("copy started\n");
err = cuMemcpyHtoDAsync(dev_A, host, size, stream);
check(err);
printf("copy returned\n");
// Wait for memcpy to complete
cuStreamSynchronize(stream);
printf("copy finished\n");
}
int main(int argc, char *argv[])
{
CUresult err;
pthread_t thread;
if (argc != 2)
{
printf("Usage: ./blocking-memalloc SIZE_IN_MBs\n");
exit(1);
}
size = atoi(argv[1])*1e6;
host = malloc(size);
err = cuInit(0);
check(err);
err = cuDeviceGet(&device, 0);
check(err);
err = cuCtxCreate(&context, CU_CTX_MAP_HOST, device);
check(err);
err = cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING);
check(err);
err = cuMemAlloc(&dev_A, size);
check(err);
// Launch thread to run memcpy in the background
pthread_create(&thread, NULL, run_background_memcpy, NULL);
// Give thread time to start the memcpy
sleep(2);
// Allocate a second buffer
printf("alloc started\n");
err = cuMemAlloc(&dev_B, size);
check(err);
printf("alloc finished\n");
// Set flag to allow background memcpy to finish
*host_flag = 1;
pthread_join(thread, NULL);
return 0;
}
void check(CUresult err)
{
if (err != CUDA_SUCCESS)
{
const char *err_name;
const char *err_string;
cuGetErrorName(err, &err_name);
cuGetErrorString(err, &err_string);
printf("%s: %s\n", err_name, err_string);
exit(1);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment