Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Minimal CUDA support header for parsing with Clang
/* Minimal declarations for CUDA support. Testing purposes only. */
#define __constant__ __attribute__((constant))
#define __device__ __attribute__((device))
#define __global__ extern "C" __attribute__((global))
#define __host__ __attribute__((host))
#define __shared__ __attribute__((shared))
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
#define __forceinline__ __attribute__((always_inline))
typedef struct {
unsigned int x, y, z;
} uint3;
struct dim3 {
unsigned int x, y, z;
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1)
: x(x), y(y), z(z) {}
uint3 __device__ extern const threadIdx;
uint3 __device__ extern const blockIdx;
dim3 __device__ extern const blockDim;
dim3 __device__ extern const gridDim;
int __device__ extern const warpSize;
// The following is some bits of the CUDA runtime, currently required for Clang
// to parse kernel invocation expressions correctly.
typedef struct cudaStream* cudaStream_t;
int cudaConfigureCall(dim3 grid_size, dim3 block_size, unsigned shared_size = 0,
cudaStream_t stream = 0);

This comment has been minimized.

Copy link

@hughperkins hughperkins commented May 24, 2017

Love your launch_bounds declaration :-) . And making global into extern "C" is an awesome idea.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment