Skip to content

Instantly share code, notes, and snippets.

@FantasyVR
Last active August 15, 2022 05:12
Show Gist options
  • Save FantasyVR/1c3f19c743d598f0c859b92447fb4eff to your computer and use it in GitHub Desktop.
Save FantasyVR/1c3f19c743d598f0c859b92447fb4eff to your computer and use it in GitHub Desktop.
/*
gcc cu_driver.cpp -fpermissive -ldl -g -fno-omit-frame-pointer
*/
#include <dlfcn.h>
#include <stdio.h>
typedef int (*init) ( unsigned int Flags);
typedef int (*get_version)(int *version);
typedef int (*malloc) ( void** devPtr, size_t size );
typedef int (*free) ( void* devPtr );
typedef int (*copyh2d) ( void* dst_d, void* src_h, size_t count);
typedef int (*copyd2h) ( void* dst_h, void* src_d, size_t count);
typedef int (*get_device)( void* device, int ordinal );
typedef int (*create_context)( void* pctx, unsigned int flags, void* dev);
typedef int (*destory_context)( void* pctx);
typedef int (*get_error_name)( int error, const char** pStr);
typedef int (*get_error_string)( int error, const char** pStr);
#define CHECK_ERROR(func) \
{ \
int status = (func); \
if (status != 0) { \
const char *err_name_ptr; \
const char *err_string_ptr; \
get_error_name(status, &err_name_ptr); \
get_error_string(status, &err_name_ptr); \
printf("CUDA API failed at line %d with error %d: %s (%s)\n", \
__LINE__,status, err_name_ptr, err_string_ptr); \
return 0; \
} \
}
int main(){
int err;
void *cuda_so = dlopen("/usr/lib/x86_64-linux-gnu/libcuda.so", RTLD_NOW);
init cu_init_device = (init)dlsym(cuda_so, "cuInit");
get_version cu_get_version = (get_version)dlsym(cuda_so, "cuDriverGetVersion");
get_device cu_get_device = (get_device)dlsym(cuda_so, "cuDeviceGet");
create_context cu_create_context = (create_context)dlsym(cuda_so, "cuCtxCreate");
destory_context cu_destory_context = (destory_context)dlsym(cuda_so, "cuCtxDestroy");
malloc cu_malloc = (malloc )dlsym(cuda_so, "cuMemAlloc_v2");
free cu_free = (free )dlsym(cuda_so, "cuMemFree_v2");
copyh2d cu_cpyh2d = (copyh2d)dlsym(cuda_so, "cuMemcpyHtoD_v2");
copyd2h cu_cpyd2h = (copyd2h)dlsym(cuda_so, "cuMemcpyDtoH_v2");
int version;
CHECK_ERROR(cu_init_device(0));
CHECK_ERROR(cu_get_version(&version));
printf("cuda driver version: %d\n", version);
void *device;
CHECK_ERROR(cu_get_device(&device, 0));
void *context;
err = cu_create_context(&context, 0, device);
printf("cu_create_context error: %d\n", err);
const int num_vals = 4;
float hX[4] = { 1.0f, 2.0f, 3.0f, 4.0f };
float h_tmp[4] = {0.0, 0.0, 0.0, 0.0};
// Device memory management
float *dX;
err = cu_malloc((void**) &dX, num_vals * sizeof(float));
printf("malloc error: %d\n", err);
cu_cpyh2d(dX, hX, num_vals * sizeof(float)); // copy hX to dX
cu_cpyd2h(h_tmp, dX, num_vals * sizeof(float)); // copy dX to h_tmp
printf("dX values: %f %f %f %f\n", h_tmp[0], h_tmp[1], h_tmp[2], h_tmp[3]); // h_tmp should be equal to hX
cu_destory_context(context);
}
/* output:
cuda driver version: 11040
cu_create_context error: 0
malloc error: 201
dX values: 0.000000 0.000000 0.000000 0.000000
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment