Skip to content

Instantly share code, notes, and snippets.

@mcleary
Created April 23, 2019 14:13
Show Gist options
  • Save mcleary/5915b184ada922d6739710d1ad54e575 to your computer and use it in GitHub Desktop.
Save mcleary/5915b184ada922d6739710d1ad54e575 to your computer and use it in GitHub Desktop.
Very simple OpenCL application.
#include <iostream>
#include <vector>
#define CL_HPP_TARGET_OPENCL_VERSION 200
#define CL_HPP_ENABLE_EXCEPTIONS
#include <CL/cl2.hpp>
using namespace std;
int main() {
std::string kernelsSrc{ R"CLC(
kernel void add(
global float* restrict a,
global float* restrict b,
global float* restrict c,
size_t N,
float A,
float B,
float C
)
{
const size_t i = get_global_id(0);
if (i < N)
{
// Do some stupid calculations
for (int t = 0; t < 50; ++t)
c[i] = A * sin(a[i]) + B * cos(b[i]) + sqrt(A * cos(a[i]) * B * sin(b[i]));
c[i] /= C * tan(c[i]) + 1;
}
}
)CLC" };
vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
const size_t platformIndexToUse = 0;
const size_t deviceIndexToUse = 0;
cl::Platform platform;
cl::Device device;
for (size_t platIndex = 0; platIndex < platforms.size(); ++platIndex)
{
const cl::Platform& plat = platforms.at(platIndex);
cout << "[" << platIndex << "]: " << plat.getInfo<CL_PLATFORM_NAME>() << endl;
vector<cl::Device> devices;
plat.getDevices(CL_DEVICE_TYPE_ALL, &devices);
for (size_t devIndex = 0; devIndex < devices.size(); ++devIndex)
{
const cl::Device& dev = devices.at(devIndex);
cout << "\t[" << devIndex << "]: " << dev.getInfo<CL_DEVICE_NAME>() << endl;
}
if (platformIndexToUse == platIndex)
{
platform = plat;
device = devices.at(deviceIndexToUse);
}
}
cout << endl;
cout << "Running on " << device.getInfo<CL_DEVICE_NAME>() << endl;
cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE;
cl::Context context{ device };
cl::CommandQueue queue{ context, props };
cl::Program program{ context, kernelsSrc };
try
{
program.build();
}
catch (const cl::Error& err)
{
cout << err.what() << endl;
for (auto p : program.getBuildInfo<CL_PROGRAM_BUILD_LOG>())
{
cout << p.second << endl;
}
exit(1);
}
float cA = 1234;
float cB = 4321;
float cC = 5678;
auto addKernel = cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, size_t, float, float, float>(program, "add");
// size_t N = 100'000'000;
size_t N = 10'000;
cout << "Initializing ... " << flush;
vector<float> A(N, 123);
vector<float> B(N, 111);
vector<float> C(N);
cout << "done" << endl;
const size_t bufSize = A.size() * sizeof(float);
cl::Buffer bufA{ context, CL_MEM_READ_ONLY, bufSize };
cl::Buffer bufB{ context, CL_MEM_READ_ONLY, bufSize };
cl::Buffer bufC{ context, CL_MEM_WRITE_ONLY, bufSize };
cl::copy(queue, begin(A), end(A), bufA);
cl::copy(queue, begin(B), end(B), bufB);
cout << "Copying ... ";
queue.finish();
cout << "done" << endl;
for (int i = 0; i < 5; ++i)
{
cl::Event e = addKernel(cl::EnqueueArgs{ queue, cl::NDRange{A.size()} }, bufA, bufB, bufC, A.size(), cA, cB, cC);
e.wait();
cout << "Queued : " << e.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() << endl;
cout << "Submit : " << e.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>() << endl;
cout << "Start : " << e.getProfilingInfo<CL_PROFILING_COMMAND_START>() << endl;
cout << "End : " << e.getProfilingInfo<CL_PROFILING_COMMAND_END>() << endl;
cout << endl;
}
cout << "Copying back ..." << flush;
cl::copy(queue, bufC, begin(C), end(C));
cout << "done" << endl;
cout << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment