Skip to content

Instantly share code, notes, and snippets.

@Imxset21
Last active August 29, 2015 14:00
Show Gist options
  • Save Imxset21/11264592 to your computer and use it in GitHub Desktop.
Save Imxset21/11264592 to your computer and use it in GitHub Desktop.
CUDA C Device Symbol Malloc
/*
Dynamically allocates memory for a device-side variable-length array.
Primary purpose is to be able to use the device variable to access the
allocated memory rather than having to keep track through function params.
@author Pedro Rittner
@email pedro.rittner@outlook.com
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
// Utility macro to check for and print CUDA errors
#define CUDA_CHECK_RETURN(value) { \
cudaError_t _m_cudaStat = value; \
if (_m_cudaStat != cudaSuccess) { \
fprintf(stderr, "Error %s at line %d in file %s\n", \
cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \
exit(EXIT_FAILURE); \
} }
//////////////////////////
// Device Symbol Malloc //
//////////////////////////
// Declare a device-side pointer to hold the data
__device__ float* my_float_ptr = NULL;
// Dummy test function, should output "my_float_ptr: 5.0"
__global__ void cuda_dev_malloc_test()
{
printf("my_float_ptr: %f\n", my_float_ptr[0]);
}
int main(int argc, char const *argv[])
{
float* host_float_ptr = NULL, host_float_val = 5.0;
// First we allocate the space we need on the device,
// and hold that pointer's value in host memory.
CUDA_CHECK_RETURN( cudaMalloc((void**)&host_float_ptr, sizeof(float)) );
// We copy the value (5.0) we want in the array to the just-allocated pointer on the device
CUDA_CHECK_RETURN(cudaMemcpy(host_float_ptr, &host_float_val, sizeof(float), cudaMemcpyHostToDevice));
// Finally, we copy the allocated pointer value to the on-device "symbol," basically making an alias
CUDA_CHECK_RETURN(cudaMemcpyToSymbol(my_float_ptr, &host_float_ptr, sizeof(float*), size_t(0), cudaMemcpyHostToDevice));
// CUDA boilerplate to run this in a single thread
const int nThreads = 1;
const int nBlocks = 1;
dim3 dimGrid(nBlocks);
dim3 dimBlock(nThreads);
// Run the test function
cuda_dev_malloc_test<<<dimGrid, dimBlock>>>();
CUDA_CHECK_RETURN(cudaDeviceSynchronize());
CUDA_CHECK_RETURN(cudaGetLastError());
cudaDeviceReset();
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment