-
-
Save rcombs/f714d2531e00275b4c9aaf05801f9bdf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __global__ | |
#define __global__ __attribute__((global)) | |
typedef __attribute__((device_builtin)) unsigned long long cudaTextureObject_t; | |
typedef struct __attribute__((device_builtin)) __attribute__((aligned(2))) uchar2 | |
{ | |
unsigned char x, y; | |
} uchar2; | |
typedef struct __attribute__((device_builtin)) __attribute__((aligned(4))) ushort2 | |
{ | |
unsigned short x, y; | |
} ushort2; | |
typedef struct __attribute__((device_builtin)) uint3 | |
{ | |
unsigned int x, y, z; | |
} uint3; | |
typedef struct uint3 dim3; | |
typedef struct __attribute__((device_builtin)) __attribute__((aligned(8))) int2 | |
{ | |
int x, y; | |
} int2; | |
typedef struct __attribute__((device_builtin)) __attribute__((aligned(4))) uchar4 | |
{ | |
unsigned char x, y, z, w; | |
} uchar4; | |
typedef struct __attribute__((device_builtin)) __attribute__((aligned(8))) ushort4 | |
{ | |
unsigned char x, y, z, w; | |
} ushort4; | |
typedef struct __attribute__((device_builtin)) __attribute__((aligned(16))) int4 | |
{ | |
int x, y, z, w; | |
} int4; | |
#define GETCOMP(reg, comp) \ | |
__asm__("mov.u32 %0, " #reg "." #comp : "=r"(tmp)); \ | |
ret.comp = tmp; | |
#define GET(name, reg) static inline __attribute__((device)) uint3 name() {\ | |
uint3 ret; \ | |
unsigned tmp; \ | |
GETCOMP(reg, x) \ | |
GETCOMP(reg, y) \ | |
GETCOMP(reg, z) \ | |
return ret; \ | |
} | |
GET(getBlockIdx, ctaid) | |
GET(getBlockDim, ntid) | |
GET(getThreadIdx, tid) | |
#define blockIdx (getBlockIdx()) | |
#define blockDim (getBlockDim()) | |
#define threadIdx (getThreadIdx()) | |
#define max(a, b) ((a) > (b) ? (a) : (b)) | |
#define min(a, b) ((a) < (b) ? (a) : (b)) | |
#define make_uchar2(a, b) ((uchar2){.x = a, .y = b}) | |
#define make_ushort2(a, b) ((ushort2){.x = a, .y = b}) | |
#define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d}) | |
#define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d}) | |
#define TEX2D(type, ret) static inline __attribute__((device)) void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);} | |
TEX2D(unsigned char, a & 0xFF) | |
TEX2D(unsigned short, a & 0xFFFF) | |
TEX2D(uchar2, make_uchar2(a & 0xFF, b & 0xFF)) | |
TEX2D(ushort2, make_ushort2(a & 0xFFFF, b & 0xFFFF)) | |
TEX2D(uchar4, make_uchar4(a & 0xFF, b & 0xFF, c & 0xFF, d & 0xFF)) | |
TEX2D(ushort4, make_ushort4(a & 0xFFFF, b & 0xFFFF, c & 0xFFFF, d & 0xFFFF)) | |
template <class T> | |
static inline __attribute__((device)) T tex2D(cudaTextureObject_t texObject, float x, float y) | |
{ | |
T ret; | |
unsigned ret1, ret2, ret3, ret4; | |
asm("tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}]" : | |
"=r"(ret1), "=r"(ret2), "=r"(ret3), "=r"(ret4) : | |
"l"(texObject), "f"(x), "f"(y)); | |
conv(&ret, ret1, ret2, ret3, ret4); | |
return ret; | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment