Skip to content

Instantly share code, notes, and snippets.

@jrprice
Last active Sep 14, 2016
Embed
What would you like to do?
Simple OpenCL backend for StreamExecutor for testing purposes
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include <streamexecutor/StreamExecutor.h>
#include <vector>
#define CHECK_OCL(Op, Err) \
if (Err != CL_SUCCESS) { \
return streamexecutor::make_error("OpenCL error " + llvm::Twine(Err) + \
" while " Op); \
}
class OCLPlatformDevice : public streamexecutor::PlatformDevice {
public:
OCLPlatformDevice(cl_device_id Dev) : Device(Dev) {
// TODO: Check for errors here
Context = clCreateContext(NULL, 1, &Device, NULL, NULL, NULL);
SyncQueue = clCreateCommandQueue(Context, Device, 0, NULL);
}
~OCLPlatformDevice() {
clReleaseCommandQueue(SyncQueue);
clReleaseContext(Context);
}
private:
cl_context Context;
cl_device_id Device;
cl_command_queue SyncQueue;
std::string getName() const override {
char Name[256];
clGetDeviceInfo(Device, CL_DEVICE_NAME, sizeof(Name), Name, NULL);
std::string Output = Name;
Output += " (OpenCL)";
return Output;
}
std::string getPlatformName() const override { return "OpenCL"; }
streamexecutor::Expected<const void *> createStream() override {
cl_int Err;
cl_command_queue Queue = clCreateCommandQueue(Context, Device, 0, &Err);
CHECK_OCL("creating command-queue", Err);
return Queue;
}
streamexecutor::Error destroyStream(const void *Handle) override {
CHECK_OCL("destroying command-queue",
clReleaseCommandQueue((cl_command_queue)Handle));
return streamexecutor::Error::success();
}
streamexecutor::Expected<const void *>
createKernel(const streamexecutor::MultiKernelLoaderSpec &Spec) override {
cl_int Err;
const char *Source = Spec.getOpenCLTextInMemory().getText();
// Create program
cl_program Program =
clCreateProgramWithSource(Context, 1, &Source, NULL, &Err);
CHECK_OCL("creating program", Err);
// Build program
Err = clBuildProgram(Program, 1, &Device, "", NULL, NULL);
if (Err == CL_BUILD_PROGRAM_FAILURE) {
char buildlog[4096];
clGetProgramBuildInfo(Program, Device, CL_PROGRAM_BUILD_LOG,
sizeof(buildlog), buildlog, NULL);
return streamexecutor::make_error("OpenCL build failed:\n" +
llvm::Twine(buildlog));
}
CHECK_OCL("building program", Err);
// Create kernel
cl_kernel Kernel =
clCreateKernel(Program, Spec.getKernelName().c_str(), &Err);
CHECK_OCL("creating kernel", Err);
return Kernel;
}
streamexecutor::Error destroyKernel(const void *Handle) override {
CHECK_OCL("destroying kernel", clReleaseKernel((cl_kernel)Handle));
return streamexecutor::Error::success();
}
streamexecutor::Error blockHostUntilDone(const void *S) override {
cl_int Err = clFinish((cl_command_queue)S);
CHECK_OCL("waiting for queue", Err);
return streamexecutor::Error::success();
}
streamexecutor::Error
launch(const void *S, streamexecutor::BlockDimensions BlockSize,
streamexecutor::GridDimensions GridSize, const void *K,
const streamexecutor::PackedKernelArgumentArrayBase &ArgumentArray)
override {
cl_int Err;
cl_kernel Kernel = (cl_kernel)K;
// Set kernel arguments
for (size_t i = 0; i < ArgumentArray.getArgumentCount(); i++) {
const void *Arg = ArgumentArray.getAddress(i);
Err = clSetKernelArg(Kernel, i, ArgumentArray.getSize(i), Arg);
CHECK_OCL("setting kernel argument", Err);
}
// Calculate global work size
size_t Global[3] = {BlockSize.X * GridSize.X, BlockSize.Y * GridSize.Y,
BlockSize.Z * GridSize.Z};
size_t Local[3] = {BlockSize.X, BlockSize.Y, BlockSize.Z};
// Enqueue kernel
cl_command_queue Queue = (cl_command_queue)S;
Err = clEnqueueNDRangeKernel(Queue, Kernel, 3, NULL, Global, Local, 0, NULL,
NULL);
CHECK_OCL("enqueuing kernel", Err);
return streamexecutor::Error::success();
}
streamexecutor::Expected<void *>
allocateDeviceMemory(size_t ByteCount) override {
cl_int Err;
cl_mem Memory = clCreateBuffer(Context, 0, ByteCount, NULL, &Err);
CHECK_OCL("creating buffer", Err);
return Memory;
}
streamexecutor::Error freeDeviceMemory(const void *Handle) override {
clReleaseMemObject((cl_mem)Handle);
return streamexecutor::Error::success();
}
streamexecutor::Error registerHostMemory(void *Memory,
size_t ByteCount) override {
return streamexecutor::Error::success();
}
streamexecutor::Error unregisterHostMemory(const void *Memory) override {
return streamexecutor::Error::success();
}
streamexecutor::Error copyD2H(const void *S, const void *DeviceHandleSrc,
size_t SrcByteOffset, void *HostDst,
size_t DstByteOffset,
size_t ByteCount) override {
cl_command_queue Queue = (cl_command_queue)S;
cl_int Err = clEnqueueReadBuffer(
Queue, (cl_mem)DeviceHandleSrc, CL_FALSE, SrcByteOffset, ByteCount,
(char *)HostDst + DstByteOffset, 0, NULL, NULL);
CHECK_OCL("reading buffer", Err);
return streamexecutor::Error::success();
}
streamexecutor::Error copyH2D(const void *S, const void *HostSrc,
size_t SrcByteOffset,
const void *DeviceHandleDst,
size_t DstByteOffset,
size_t ByteCount) override {
cl_command_queue Queue = (cl_command_queue)S;
cl_int Err = clEnqueueWriteBuffer(
Queue, (cl_mem)DeviceHandleDst, CL_FALSE, DstByteOffset, ByteCount,
(const char *)HostSrc + SrcByteOffset, 0, NULL, NULL);
CHECK_OCL("writing buffer", Err);
return streamexecutor::Error::success();
}
streamexecutor::Error copyD2D(const void *S, const void *DeviceHandleSrc,
size_t SrcByteOffset,
const void *DeviceHandleDst,
size_t DstByteOffset,
size_t ByteCount) override {
cl_command_queue Queue = (cl_command_queue)S;
cl_int Err = clEnqueueCopyBuffer(Queue, (cl_mem)DeviceHandleSrc,
(cl_mem)DeviceHandleDst, SrcByteOffset,
DstByteOffset, ByteCount, 0, NULL, NULL);
CHECK_OCL("enqueuing copy buffer", Err);
return streamexecutor::Error::success();
}
streamexecutor::Error synchronousCopyD2H(const void *DeviceHandleSrc,
size_t SrcByteOffset, void *HostDst,
size_t DstByteOffset,
size_t ByteCount) override {
cl_int Err = clEnqueueReadBuffer(
SyncQueue, (cl_mem)DeviceHandleSrc, CL_TRUE, SrcByteOffset, ByteCount,
(char *)HostDst + DstByteOffset, 0, NULL, NULL);
CHECK_OCL("reading buffer", Err);
return streamexecutor::Error::success();
}
streamexecutor::Error synchronousCopyH2D(const void *HostSrc,
size_t SrcByteOffset,
const void *DeviceHandleDst,
size_t DstByteOffset,
size_t ByteCount) override {
cl_int Err = clEnqueueWriteBuffer(
SyncQueue, (cl_mem)DeviceHandleDst, CL_TRUE, DstByteOffset, ByteCount,
(const char *)HostSrc + SrcByteOffset, 0, NULL, NULL);
CHECK_OCL("writing buffer", Err);
return streamexecutor::Error::success();
}
streamexecutor::Error synchronousCopyD2D(const void *DeviceHandleSrc,
size_t SrcByteOffset,
const void *DeviceHandleDst,
size_t DstByteOffset,
size_t ByteCount) override {
cl_int Err = clEnqueueCopyBuffer(SyncQueue, (cl_mem)DeviceHandleSrc,
(cl_mem)DeviceHandleDst, SrcByteOffset,
DstByteOffset, ByteCount, 0, NULL, NULL);
CHECK_OCL("enqueuing copy buffer", Err);
CHECK_OCL("waiting for queue", clFinish(SyncQueue));
return streamexecutor::Error::success();
}
};
class OCLPlatform : public streamexecutor::Platform {
public:
OCLPlatform() {
cl_uint NumPlatforms = 0;
cl_platform_id Platforms[8];
clGetPlatformIDs(8, Platforms, &NumPlatforms);
for (cl_uint p = 0; p < NumPlatforms; p++) {
cl_uint NumDevices = 0;
cl_device_id DeviceIDs[8];
clGetDeviceIDs(Platforms[p], CL_DEVICE_TYPE_ALL, 8, DeviceIDs,
&NumDevices);
for (cl_uint i = 0; i < NumDevices; i++) {
streamexecutor::PlatformDevice *PDevice =
new OCLPlatformDevice(DeviceIDs[i]);
Devices.push_back(streamexecutor::Device(PDevice));
}
}
}
size_t getDeviceCount() const { return Devices.size(); }
streamexecutor::Expected<streamexecutor::Device>
getDevice(size_t DeviceIndex) {
if (DeviceIndex >= Devices.size())
return streamexecutor::make_error("DeviceIndex out of range");
return Devices[DeviceIndex];
}
private:
std::vector<streamexecutor::Device> Devices;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment