Skip to content

Instantly share code, notes, and snippets.

@rrika
Created February 15, 2021 19:41
Show Gist options
  • Save rrika/f5f89c2a8c9932c2630ef1c73bac08de to your computer and use it in GitHub Desktop.
Save rrika/f5f89c2a8c9932c2630ef1c73bac08de to your computer and use it in GitHub Desktop.
HIP without HIP
// clang++
// -x hip
// test.cpp
// -O3
// --cuda-gpu-arch=gfx1010
// --hip-device-lib=dummy.bc
// --hip-device-lib-path=path_to_dummy_bc
// -nogpuinc
// -fuse-ld=lld
// -fgpu-rdc
// -o test
#include <cstdio>
#include <vector>
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
#define __host__ __attribute__((host))
struct dim3 {
unsigned x, y, z;
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
};
typedef struct hipStream *hipStream_t;
typedef enum hipError { hipSuccess = 0 } hipError_t;
typedef struct ihipModule_t* hipModule_t;
extern "C" hipError_t __hipPushCallConfiguration(
dim3 gridSize,
dim3 blockSize,
size_t sharedSize = 0,
hipStream_t stream = 0)
{
return hipSuccess; // important, else stuff gets dead-code eliminated
}
extern "C" hipError_t __hipPopCallConfiguration(
dim3 *gridDim,
dim3 *blockDim,
size_t *sharedMem,
hipStream_t *stream)
{
return hipSuccess;
}
extern "C" std::vector<hipModule_t>* __hipRegisterFatBinary(const void* data) {
struct ClangOffloadBlob {
char magic[24];
unsigned long long num_entries;
};
struct BlobEntry {
unsigned long long offset, size, label_size;
char label[];
};
struct W {
int dummy1, dummy2;
ClangOffloadBlob *blob;
} *wrapper = (W*)data;
printf("\nregistering clang offload bundle: %s\n", (char*)wrapper->blob);
auto num_entries = wrapper->blob->num_entries;
auto entry = (BlobEntry*)&wrapper->blob[1];
for (int i=0; i<num_entries; i++) {
auto ls = entry->label_size;
printf(" entry %d: %.*s\n", i, (int)ls, entry->label);
// entry->offset leads you do an embedded ELF file where you can find your actual GPU code
entry++;
entry = (BlobEntry*)(ls+(char*)entry);
}
puts("");
return (std::vector<hipModule_t>*) 0; // aaa
}
extern "C" void __hipUnregisterFatBinary(std::vector<hipModule_t>* modules) {}
extern "C" void __hipRegisterFunction(
std::vector<hipModule_t>* modules,
const void* hostFunction,
char* deviceFunction,
const char* deviceName,
unsigned int threadLimit,
unsigned* tid,
unsigned* bid,
dim3* blockDim,
dim3* gridDim,
int* wSize)
{
void *aaa = (void*)modules; // see above
printf("__hipRegisterFunction hostFunction=%p deviceFunction=%s deviceName=%s\n",
hostFunction, deviceFunction, deviceName);
}
extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
dim3 blockDim, void **args,
size_t sharedMem,
hipStream_t stream)
{
printf("launch funcptr=%p\n", func);
return hipSuccess; // important, else endless loop
}
__global__ void offload_me(int *z) {
z[0] = -z[0];
}
int main(int argc, char **argv) {
int q = 99;
offload_me<<<1, 1>>>(&q);
puts("");
return q;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment