Skip to content

Instantly share code, notes, and snippets.

@ruofeidu
Last active August 22, 2018 17:37
Show Gist options
  • Save ruofeidu/df95ba27dfc6b77121b27fd4a6483426 to your computer and use it in GitHub Desktop.
Save ruofeidu/df95ba27dfc6b77121b27fd4a6483426 to your computer and use it in GitHub Desktop.
CUDA Helper for Visual Studio INTELLISENSE
#pragma once
#pragma comment(lib, "cudart.lib")
#if _DEBUG
#pragma comment(lib, "opencv_world330d.lib")
#else
#pragma comment(lib, "opencv_world330.lib")
#endif
#ifdef __CUDACC__
#define KERNEL_ARG2(grid, block) <<< grid, block >>>
#define KERNEL_ARG3(grid, block, sh_mem) <<< grid, block, sh_mem >>>
#define KERNEL_ARG4(grid, block, sh_mem, stream) <<< grid, block, sh_mem, stream >>>
#else
#define KERNEL_ARG2(grid, block)
#define KERNEL_ARG3(grid, block, sh_mem)
#define KERNEL_ARG4(grid, block, sh_mem, stream)
#endif
#ifdef __INTELLISENSE__
int __float_as_int(float in);
float __int_as_float(int in);
short __float2half_rn(float in);
//Compare-and-Swap operation.
unsigned int atomicInc(unsigned int* address, unsigned int val);
int atomicCAS(int* address, int compare, int val);
unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val);
unsigned long long int atomicCAS(unsigned long long int* address, unsigned long long int compare, unsigned long long int val);
int atomicAdd(int* address, int val);
unsigned int atomicAdd(unsigned int* address, unsigned int val);
unsigned long long int atomicAdd(unsigned long long int* address, unsigned long long int val);
float atomicAdd(float* address, float val); double atomicAdd(double* address, double val);
int atomicSub(int* address, int val);
unsigned int atomicSub(unsigned int* address, unsigned int val);
template<class T> T tex2DLayered(cudaTextureObject_t texObj, float x, float y, int layer);
template<class T> T tex2DLayered(cudaTextureObject_t texObj, int x, int y, int layer);
template<class T> T tex2DLayered(texture<unsigned short, cudaTextureType2DLayered, cudaReadModeElementType> texObj, int x, int y, int layer);
template<class T> T surf2DLayeredread(cudaSurfaceObject_t surfObj, int x, int y, int layer, boundaryMode = cudaBoundaryModeTrap);
template<class T> void surf2DLayeredread(T data, cudaSurfaceObject_t surfObj, int x, int y, int layer, boundaryMode = cudaBoundaryModeTrap);
template<class T> void surf3Dwrite(T data, cudaSurfaceObject_t surfObj, int x, int y, int z, boundaryMode = cudaBoundaryModeTrap);
#define __syncthreads()
#define __syncthreads_or(a) a
template<class T> T tex3D(cudaTextureObject_t texObj, float x, float y, float z);
template<class T> T tex3DLod(cudaTextureObject_t texObj, float x, float y, float z, float level);
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment