Last active
August 29, 2015 14:07
-
-
Save flisboac/16f87a86e4f559f89ee3 to your computer and use it in GitHub Desktop.
A simple, yet complex, yet complete, yet big, example of vector multiplication, implemented in both CPU and GPU (using OpenCL).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
A simple, yet complex, yet complete, yet big, example of vector multiplication, implemented in both CPU and GPU (using OpenCL). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*.a | |
*.o | |
*.so | |
opencl-example-vecmul | |
.settings |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
RM=rm -f | |
RMDIR=rm -rf | |
CC=gcc -c -o | |
CPP=g++ -c -o | |
SLINK=ar rcu | |
ELINK=g++ -o | |
DLINK=g++ -shared -o | |
SOBJS=opencl-helper.o | |
DOBJS= | |
EOBJS=opencl-example-vecmul.o | |
SLIB=libopencl-helper.a | |
DLIB= | |
EXEC=opencl-example-vecmul | |
SDEPS= | |
DDEPS= | |
EDEPS=$(SLIB) | |
CFLAGS=-Wall -g --std=c99 | |
CPPFLAGS=-Wall -g --std=c++11 | |
LIBS=-lOpenCL | |
LINKFLAGS= | |
SFLAGS=$(LINKFLAGS) | |
EFLAGS=$(LINKFLAGS) | |
DFLAGS=$(LINKFLAGS) | |
TARGETS=$(EXEC) $(SLIB) | |
all: $(TARGETS) | |
clean: | |
$(RM) $(SLIB) $(DLIB) $(EXEC) $(SOBJS) $(DOBJS) $(EOBJS) | |
$(SLIB): $(SOBJS) | |
$(SLINK) $@ $? $(SFLAGS) | |
$(DLIB): $(DOBJS) $(DDEPS) | |
$(DLINK) $@ $? $(DFLAGS) $(LIBS) $(DDEPS) | |
$(EXEC): $(EOBJS) $(EDEPS) | |
$(ELINK) $@ $? $(EFLAGS) $(LIBS) $(EDEPS) | |
.cpp.o: | |
$(CPP) $@ $? $(CPPFLAGS) | |
.c.o: | |
$(CC) $@ $? $(CFLAGS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Compile with: g++ --std=c++11 -Wall -lOpenCL -o opencl-example-vecmul opencl-example-vecmul.cpp | |
#include <limits> | |
#include <iostream> | |
#include <sstream> | |
#include <iomanip> | |
#include <cstdlib> | |
#include "opencl-helper.hpp" | |
#include "opencl-example-vecmul.hpp" | |
int main(int argc, const char* argv[]) { | |
Example<cl_float> example; | |
int ret = example.parse(argc, argv); | |
if (ret == 0) | |
ret = example.initialize(); | |
example.print_options(std::cerr); | |
if (ret == 0) | |
ret = example.execute(); | |
return ret; | |
} | |
template <typename T> Example<T>::Example() | |
// Options | |
: _program_name("example") | |
, _commands("") | |
, _platform_idx(-1) | |
, _vector_size(100000) | |
, _min_value(0) | |
, _max_value(100) | |
, _margin( | |
std::numeric_limits<T>::is_specialized | |
? std::numeric_limits<T>::is_integer | |
? 0 | |
: std::numeric_limits<T>::epsilon() | |
: 0 | |
) | |
, _verbose(false) | |
// State | |
, _program_source() | |
, _global_size(0) | |
, _local_size(0) | |
, _Va(), _Vb(), _Vo() | |
, _context() | |
, _device() | |
, _program() | |
{} | |
template <typename T> int Example<T>::parse(int argc, const char** argv) { | |
_program_name = argv[0]; | |
if (argc > 1) _commands = argv[1]; | |
if (argc > 2)_platform_idx = std::atoi(argv[2]); | |
if (argc > 3) _vector_size = std::atoi(argv[3]); | |
if (argc > 4) _min_value = std::atof(argv[4]); | |
if (argc > 5) _max_value = std::atof(argv[5]); | |
if (argc > 6) _margin = std::atof(argv[6]); | |
return 0; | |
} | |
template <typename T> void Example<T>::split(std::vector<Example<T>> per_command_examples) { | |
per_command_examples.resize(_commands.size()); | |
for (size_t i = 0; i < _commands.size(); ++i) { | |
per_command_examples[i] = Example(const_cast<Example<T>&>(*this)); | |
per_command_examples[i]._commands = _commands[i]; | |
} | |
} | |
template <typename T> int Example<T>::initialize() { | |
int ret = 0; | |
try { | |
_context = create_context(1, CL_DEVICE_TYPE_ALL, _platform_idx); | |
_device = first_device(_context); | |
_queue = cl::CommandQueue(_context, _device, CL_QUEUE_PROFILING_ENABLE); | |
auto sizes = optimal_sizes(); | |
_global_size = sizes.first; | |
_local_size = sizes.second; | |
if (_global_size == 0 || _local_size == 0) { | |
std::cerr << "* ERROR: Could not find suitable sizes. Global: " << _global_size << ", Local: " << _local_size << std::endl; | |
ret = 1; | |
} else { | |
_Va.resize(_global_size); | |
_Vb.resize(_global_size); | |
_Vo.resize(_global_size); | |
fill(); | |
size_t pos; | |
_program_source = ::program_source; | |
while ((pos = _program_source.find("TYPE")) != std::string::npos) { | |
_program_source.replace(pos, strlen("TYPE"), TypeInfo<T>::type_name()); | |
} | |
std::cout << _program_source << std::endl; | |
_program = compile_program(_context, _program_source.c_str(), _program_source.size(), "-cl-std=CL1.1 -cl-kernel-arg-info -w -g"); | |
} | |
} catch (cl::Error& ex) { | |
std::cerr << "* OPENCL EXCEPTION " << ex.err() << ": " << ex.what() << std::endl; | |
ret = 1; | |
} catch (Error& ex) { | |
std::cerr << "* EXCEPTION " << ex.code() << ": " << ex.what() << std::endl; | |
ret = 1; | |
} | |
return ret; | |
} | |
template <typename T> void Example<T>::print_options(std::ostream& os) { | |
os << "* Minimum value: " << _min_value << std::endl; | |
os << "* Maximum value: " << _max_value << std::endl; | |
os << "* Margin value: " << _margin << std::endl; | |
os << "* Vector size: " << _vector_size << std::endl; | |
os << "* Commands: " << _commands << std::endl; | |
if (_global_size) os << "* Global Size: " << _global_size << std::endl; | |
if (_local_size) os << "* Local Size: " << _local_size << std::endl; | |
} | |
template <typename T> int Example<T>::execute() { | |
int ret = 0; | |
if (_commands.empty()) { | |
std::cerr << "USAGE: " << _program_name << " <COMMANDS> [PLATFORM_IDX] [VECTOR_SIZE] [MIN_VALUE] [MAX_VALUE] [MARGIN]" << std::endl; | |
} else { | |
for (size_t i = 0; i < _commands.size(); ++i) { | |
switch(_commands[i]) { | |
case 'p': | |
std::cout << _program_source << std::endl; | |
break; | |
case 'c': | |
{ | |
copy(_Va, _Vo); | |
CpuTiming timing; | |
timing.start(); | |
vecmul(&_Vo[0], &_Vb[0], _Vo.size()); | |
timing.end(); | |
std::cout << "Local-OpenCL statistics:" << std::endl | |
<< "\tValid: " << verify(_Vo) << std::endl | |
<< "\tTotal CPU time: " << timing.sec() << "s" << std::endl; | |
} | |
break; | |
case 'g': | |
try { | |
copy(_Va, _Vo); | |
execute_global(); | |
} catch (cl::Error& ex) { | |
std::cerr << "* [GLOBAL] OPENCL ERROR " << ex.err() << ": " << ex.what() << std::endl; | |
} catch (Error& ex) { | |
std::cerr << "* [GLOBAL] ERROR " << ex.code() << ": " << ex.what() << std::endl; | |
} | |
break; | |
case 'l': | |
{ | |
try { | |
copy(_Va, _Vo); | |
execute_local(); | |
} catch (cl::Error& ex) { | |
std::cerr << "* [LOCAL] OPENCL ERROR " << ex.err() << ": " << ex.what() << std::endl; | |
} catch (Error& ex) { | |
std::cerr << "* [LOCAL] ERROR " << ex.code() << ": " << ex.what() << std::endl; | |
} | |
} | |
break; | |
default: | |
std::cerr << "* WARNING: Unknown command '" << _commands[i] << "'." << std::endl; | |
} | |
} | |
} | |
return ret; | |
} | |
template <typename T> void Example<T>::fill() { | |
for (size_t i = 0; i < _vector_size; ++i) { | |
_Va[i] = _min_value + (static_cast<float>(rand()) / RAND_MAX) * (_max_value - _min_value); | |
_Vb[i] = _min_value + (static_cast<float>(rand()) / RAND_MAX) * (_max_value - _min_value); | |
} | |
} | |
template <typename T> void Example<T>::copy(const std::vector<T>& from, std::vector<T>& to) { | |
std::copy(from.begin(), from.begin() + _vector_size, to.begin()); | |
} | |
template <typename T> bool Example<T>::verify(const std::vector<T>& Vo) { | |
for (size_t i = 0; i < _vector_size; ++i) { | |
T correct_value = _Va[i] * _Vb[i]; | |
T value = Vo[i]; | |
if (!within_margin(correct_value, value)) { | |
return false; | |
} | |
} | |
return true; | |
} | |
template <typename T> bool Example<T>::within_margin(T correct_value, T value) { | |
T diff = correct_value - value; | |
if (diff < 0) diff = -diff; | |
if (diff > _margin) { | |
return false; | |
} | |
return true; | |
} | |
template <typename T> std::pair<size_t, size_t> Example<T>::optimal_sizes() { | |
// Utterly fails if architecture/platform doesn't give or optimize workgroup sizes | |
// in powers of 2. | |
std::vector<size_t> maxWorkItemSizes = _device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>(); | |
size_t maxLocalMemSize = _device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>(); | |
// Limit by maximum work-item size. | |
size_t size_pow = highest_pow2(_vector_size); | |
size_t local_size = std::min(size_pow, maxWorkItemSizes[0]); | |
// Further limit by maximum local memory. | |
while (local_size * sizeof(T) * 2 > maxLocalMemSize) { | |
local_size /= 2; | |
} | |
size_t global_size = next_multiple_ge_size(_vector_size, local_size); | |
return std::make_pair(global_size, local_size); | |
} | |
template <typename T> void Example<T>::execute_global() { | |
CpuTiming timing; | |
timing.start(); | |
cl::Buffer Ma(_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, _vector_size * sizeof(T), &_Vo[0]); | |
cl::Buffer Mb(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, _vector_size * sizeof(T), &_Vb[0]); | |
std::vector<cl::Event> kernel_events(1); | |
kernel_events[0] = enqueue_global(Ma, Mb); | |
cl::Event copy_event = enqueue_read(Ma, _Vo, true, &kernel_events, _vector_size); | |
timing.end(); | |
bool valid = verify(_Vo); | |
std::cout << "Global-OpenCL statistics:" << std::endl | |
<< "\tValid: " << valid << std::endl | |
<< "\tTotal CPU Time: " << timing.sec() << "s" << std::endl; | |
} | |
template <typename T> void Example<T>::execute_local() { | |
CpuTiming timing; | |
timing.start(); | |
cl::Buffer Ma(_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, _vector_size * sizeof(T), &_Vo[0]); | |
cl::Buffer Mb(_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, _vector_size * sizeof(T), &_Vb[0]); | |
std::vector<cl::Event> kernel_events(1); | |
kernel_events[0] = enqueue_local(Ma, Mb); | |
cl::Event copy_event = enqueue_read(Ma, _Vo, true, &kernel_events, _vector_size); | |
timing.end(); | |
bool valid = verify(_Vo); | |
std::cout << "Local-OpenCL statistics:" << std::endl | |
<< "\tValid: " << valid << std::endl | |
<< "\tTotal CPU Time: " << timing.sec() << "s" << std::endl; | |
} | |
template <typename T> cl::Event Example<T>::enqueue_read(cl::Buffer& M, std::vector<T>& to, bool sync, std::vector<cl::Event>* dependencies, size_t size) { | |
cl::Event event; | |
if (size == 0) size = to.size(); | |
_queue.enqueueReadBuffer(M, sync, 0, size * sizeof(T), &to[0], dependencies, &event); | |
return event; | |
} | |
template <typename T> cl::Event Example<T>::enqueue_write(cl::Buffer& M, std::vector<T>& from, bool sync, std::vector<cl::Event>* dependencies, size_t size) { | |
cl::Event event; | |
if (size == 0) size = from.size(); | |
_queue.enqueueWriteBuffer(M, sync, 0, size * sizeof(T), &from[0], dependencies, &event); | |
return event; | |
} | |
template <typename T> cl::Event Example<T>::enqueue_global(cl::Buffer& Ma, cl::Buffer& Mb, std::vector<cl::Event>* dependencies) { | |
cl::Event event; | |
size_t arg_idx = 0; | |
cl::NDRange range(_global_size); | |
cl::Kernel kernel(_program, "vecmul_g"); | |
kernel.setArg(arg_idx++, Ma); | |
kernel.setArg(arg_idx++, Mb); | |
kernel.setArg(arg_idx++, static_cast<cl_int>(_vector_size)); | |
_queue.enqueueNDRangeKernel(kernel, cl::NullRange, range, cl::NullRange, dependencies, &event); | |
return event; | |
} | |
template <typename T> cl::Event Example<T>::enqueue_local(cl::Buffer& Ma, cl::Buffer& Mb, std::vector<cl::Event>* dependencies) { | |
cl::Event event; | |
size_t arg_idx = 0; | |
cl::NDRange grange(_global_size); | |
cl::NDRange lrange(_local_size); | |
cl::Kernel kernel(_program, "vecmul_l"); | |
kernel.setArg(arg_idx++, Ma); | |
kernel.setArg(arg_idx++, Mb); | |
kernel.setArg(arg_idx++, cl::Local(_local_size * sizeof(T))); // cl::__local(_local_size * sizeof(T)) | |
kernel.setArg(arg_idx++, cl::Local(_local_size * sizeof(T))); | |
kernel.setArg(arg_idx++, static_cast<cl_int>(_vector_size)); | |
_queue.enqueueNDRangeKernel(kernel, cl::NullRange, grange, lrange, dependencies, &event); | |
return event; | |
} | |
void vecmul(float* Va, const float* Vb, size_t N) { | |
for (size_t i = 0; i < N; ++i) { | |
Va[i] = Va[i] * Vb[i]; | |
} | |
} | |
const char* const program_source = | |
"kernel void vecmul_g(\n" | |
" global TYPE* Va,\n" | |
" global TYPE* Vb,\n" | |
" const int N\n" | |
") {\n" | |
" int i = get_global_id(0);\n" | |
" if (i < N) {\n" | |
" Va[i] = Va[i] * Vb[i];\n" | |
" }\n" | |
"}\n" | |
"\n" | |
"kernel void vecmul_l(\n" | |
" global TYPE* Va,\n" | |
" global TYPE* Vb,\n" | |
" local TYPE* Ba,\n" | |
" local TYPE* Bb,\n" | |
" const int N\n" | |
") {\n" | |
" int i = get_global_id(0);\n" | |
" if (i < N) {\n" | |
" int LN = get_group_id(0) *\n" | |
" get_local_size(0);\n" | |
" int L = get_local_id(0) + LN;\n" | |
" Ba[L] = Va[i]; Bb[L] = Vb[i];\n" | |
" barrier(CLK_GLOBAL_MEM_FENCE);\n" | |
" Ba[L] = Ba[L] * Bb[L];\n" | |
" barrier(CLK_LOCAL_MEM_FENCE);\n" | |
" Va[i] = Ba[L];\n" | |
" }\n" | |
"}\n" | |
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef OPENCL_EXAMPLE_VECMUL_HPP_ | |
#define OPENCL_EXAMPLE_VECMUL_HPP_ | |
#include <vector> | |
#include <iosfwd> | |
#include <ctime> | |
#include <string> | |
#include <cstring> | |
class CpuTiming; | |
template <typename T> class Example; | |
extern const char* const program_source; | |
extern void vecmul(float* Va, const float* Vb, size_t N); | |
class CpuTiming { | |
public: | |
CpuTiming() : _start_clk(0), _end_clk(0) {} | |
inline void start() { _start_clk = std::clock(); } | |
inline void end() { _end_clk = std::clock(); } | |
inline double sec() { return static_cast<double>(_end_clk - _start_clk) / CLOCKS_PER_SEC; } | |
private: | |
clock_t _start_clk; | |
clock_t _end_clk; | |
}; | |
template <typename T> struct TypeInfo { | |
static inline constexpr const char* type_name() { return ""; } | |
}; | |
template <> struct TypeInfo<cl_int> { | |
static inline constexpr const char* type_name() { return "int"; } | |
}; | |
template <> struct TypeInfo<cl_float> { | |
static inline constexpr const char* type_name() { return "float"; } | |
}; | |
template <> struct TypeInfo<cl_double> { | |
static inline constexpr const char* type_name() { return "double"; } | |
}; | |
template <typename T> class Example { | |
public: | |
Example(); | |
Example(const Example<T>& other) = default; | |
int parse(int argc, const char** argv); | |
void split(std::vector<Example<T>> per_command_examples); | |
int initialize(); | |
void print_options(std::ostream& os); | |
int execute(); | |
private: | |
void execute_global(); | |
void execute_local(); | |
cl::Event enqueue_read(cl::Buffer& M, std::vector<T>& to, bool sync, std::vector<cl::Event>* dependencies = NULL, size_t size = 0); | |
cl::Event enqueue_write(cl::Buffer& M, std::vector<T>& from, bool sync, std::vector<cl::Event>* dependencies = NULL, size_t size = 0); | |
cl::Event enqueue_global(cl::Buffer& Ma, cl::Buffer& Mb, std::vector<cl::Event>* dependencies = NULL); | |
cl::Event enqueue_local(cl::Buffer& Ma, cl::Buffer& Mb, std::vector<cl::Event>* dependencies = NULL); | |
void fill(); | |
void copy(const std::vector<T>& from, std::vector<T>& to); | |
bool verify(const std::vector<T>& Vo); | |
bool within_margin(T correct_value, T value); | |
std::pair<size_t, size_t> optimal_sizes(); | |
private: | |
// Options | |
std::string _program_name; | |
std::string _commands; | |
int _platform_idx; | |
size_t _vector_size; | |
T _min_value; | |
T _max_value; | |
T _margin; | |
bool _verbose; | |
// State | |
std::string _program_source; | |
size_t _global_size; | |
size_t _local_size; | |
std::vector<T> _Va, _Vb, _Vo; | |
cl::Context _context; | |
cl::Device _device; | |
cl::Program _program; | |
cl::CommandQueue _queue; | |
}; | |
#endif /* OPENCL_EXAMPLE_VECMUL_HPP_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define __CL_ENABLE_EXCEPTIONS | |
#include <CL/cl.hpp> | |
#include <sstream> | |
#include <string> | |
#include <vector> | |
#include "opencl-helper.hpp" | |
size_t highest_pow2(size_t num) { | |
size_t ret = 1; | |
while (ret < num) ret <<= 1; | |
return ret; | |
} | |
size_t next_multiple_ge_size(size_t size, size_t dimSize) { | |
size_t v = 1; | |
while (v * dimSize < size) ++v; | |
return dimSize * v; | |
} | |
bool filter_devices(cl::Platform platform, std::vector<cl::Device>& output, size_t num_devices, cl_device_type device_type) { | |
bool foundDevice = false; | |
std::vector<cl::Device> platformDevices; | |
platform.getDevices(device_type, &platformDevices); | |
if (!platformDevices.empty() && platformDevices.size()) { | |
foundDevice = true; | |
if (num_devices == 0) | |
num_devices = platformDevices.size(); | |
output.insert(output.end(), platformDevices.begin(), platformDevices.begin() + std::min(num_devices, platformDevices.size())); | |
} | |
return foundDevice; | |
} | |
cl::Context create_context(size_t num_devices, cl_device_type device_type, int platform_idx, context_callback_f callback, void* callback_data) { | |
std::vector<cl::Platform> platforms; | |
cl::Platform::get(&platforms); | |
int total_devices = (num_devices) ? num_devices : -1; | |
if (platforms.empty()) | |
throw Error("No OpenCL Platform found, please check your OpenCL installation."); | |
std::vector<cl::Device> devices; | |
if (platform_idx < 0) { | |
for (std::vector<cl::Platform>::iterator it = platforms.begin(); (total_devices != 0) && it != platforms.end(); ++it) { | |
cl::Platform platform = *it; | |
if (total_devices > 0) { | |
size_t previous_size = devices.size(); | |
filter_devices(platform, devices, total_devices, device_type); | |
total_devices -= devices.size() - previous_size; | |
} else if (total_devices < 0){ | |
filter_devices(platform, devices, 0, device_type); | |
} | |
} | |
} else if (platform_idx >= static_cast<int>(platforms.size())) { | |
throw Error("Invalid platform index given."); | |
} else { | |
cl::Platform platform = platforms[platform_idx]; | |
filter_devices(platform, devices, num_devices, device_type); | |
} | |
if (devices.empty()) | |
throw Error("Could not find available/enough devices."); | |
cl::Context context(devices, NULL, callback, callback_data); | |
return context; | |
} | |
cl::Program compile_program(cl::Context& context, const char* source, size_t source_size, const char* options) { | |
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); | |
if (devices.empty()) | |
throw Error("No device available in context."); | |
cl::Program::Sources sources; | |
sources.push_back(std::make_pair(source, source_size)); | |
cl::Program program(context, sources); | |
program.build(devices, options, NULL); | |
std::stringstream ss; | |
for (auto& device : devices) { | |
if (program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(device) != CL_BUILD_SUCCESS) { | |
ss | |
<< "Program could not be built for device \"" << device.getInfo<CL_DEVICE_NAME>() << "\"." | |
<< " Status: " << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(device) | |
<< ", Options: " << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(device) | |
<< ", Log: \"" << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << "\"." | |
<< std::endl; | |
} | |
} | |
std::string errors = ss.str(); | |
if (!errors.empty()) { | |
throw Error(errors); | |
} | |
return program; | |
} | |
cl::Device first_device(cl::Context& context) { | |
std::vector<cl::Device> devices; | |
devices = context.getInfo<CL_CONTEXT_DEVICES>(); | |
return devices[0]; | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef OPENCL_HELPER_HPP_ | |
#define OPENCL_HELPER_HPP_ | |
#define __CL_ENABLE_EXCEPTIONS | |
#include <CL/cl.hpp> | |
#include <exception> | |
typedef void (*context_callback_f)(const char* message, const void* data, size_t data_size, void* user_data); | |
class Error : public std::exception { | |
public: | |
Error() throw() : _code(0) {} | |
Error(const char* what) throw() : _code(0), _what(what) {} | |
Error(const std::string& what) throw() : _code(0), _what(what) {} | |
const char* what() throw() { return _what.c_str(); } | |
int code() throw() { return _code; } | |
private: | |
int _code; | |
std::string _what; | |
}; | |
extern size_t highest_pow2(size_t num); | |
extern size_t next_multiple_ge_size(size_t size, size_t dimSize); | |
extern bool filter_devices(cl::Platform platform, std::vector<cl::Device>& output, size_t num_devices = 0, cl_device_type device_type = CL_DEVICE_TYPE_ALL); | |
extern cl::Context create_context(size_t num_devices = 0, cl_device_type device_type = CL_DEVICE_TYPE_ALL, int platform_idx = -1, context_callback_f callback = NULL, void* callback_data = NULL); | |
extern cl::Device first_device(cl::Context& context); | |
extern cl::Program compile_program(cl::Context& context, const char* source, size_t source_size, const char* options = NULL); | |
#endif /* OPENCL_HELPER_HPP_ */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment