Skip to content

Instantly share code, notes, and snippets.

@usbalbin
Last active February 5, 2018 17:19
Show Gist options
  • Save usbalbin/44cbf47240940c23698d8278e1d377da to your computer and use it in GitHub Desktop.
Save usbalbin/44cbf47240940c23698d8278e1d377da to your computer and use it in GitHub Desktop.
#include "CL/cl.h"
#include <vector>
#include <iostream>
#include <cassert>
#define ITERATION_COUNT 1
#define ELEM_COUNT 1000000
#define DEVICE_TYPE CL_DEVICE_TYPE_GPU
void setup(cl_device_type, cl_context&, cl_command_queue&, cl_kernel&);
bool get_device(cl_device_type device_type, cl_platform_id& platform_out, cl_device_id& device_out);
void print_ref_count(const char* s, int i, cl_mem buffer);
void leak();
void no_leak();
int main() {
leak();
no_leak();
}
void leak() {
cl_context context;
cl_command_queue queue;
cl_kernel kernel;
setup(DEVICE_TYPE, context, queue, kernel);
for (int i = 0; i < ITERATION_COUNT; i++) {
cl_int status;
cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, ELEM_COUNT * sizeof(float), nullptr, &status);
assert(status == CL_SUCCESS);
print_ref_count("creation", i, buf);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf);
assert(status == CL_SUCCESS);
print_ref_count("setting argument", i, buf);
size_t elem_count = ELEM_COUNT;
status = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &elem_count, nullptr, 0, nullptr, nullptr);
assert(status == CL_SUCCESS);
print_ref_count("enqueueing kernel", i, buf);
status = clReleaseMemObject(buf);
assert(status == CL_SUCCESS);
}
}
void no_leak() {
cl_context context;
cl_command_queue queue;
cl_kernel kernel;
setup(DEVICE_TYPE, context, queue, kernel);
for (int i = 0; i < ITERATION_COUNT; i++) {
cl_int status;
cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, ELEM_COUNT * sizeof(float), nullptr, &status);
assert(status == CL_SUCCESS);
print_ref_count("creation", i, buf); //Buffer i created, reference count = 1
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf);
assert(status == CL_SUCCESS);
print_ref_count("setting argument", i, buf);
size_t elem_count = ELEM_COUNT;
cl_event evnt;
status = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &elem_count, nullptr, 0, nullptr, &evnt);
assert(status == CL_SUCCESS);
status = clWaitForEvents(1, &evnt);
assert(status == CL_SUCCESS);
print_ref_count("waiting for kernel", i, buf);
status = clReleaseMemObject(buf);
assert(status == CL_SUCCESS);
}
}
void setup(cl_device_type device_type, cl_context& context_out, cl_command_queue& queue_out, cl_kernel& kernel_out) {
cl_platform_id platform;
cl_device_id device;
assert(get_device(device_type, platform, device));
const char* src =
"kernel void simple(global float* out) {\
out[get_global_id(0)] = get_global_id(0); \
}";
cl_int status;
cl_context_properties context_properties[]{
CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
0
};
context_out = clCreateContext(context_properties, 1, &device, nullptr, nullptr, &status);
assert(status == CL_SUCCESS);
cl_queue_properties queue_properties = 0;
queue_out = clCreateCommandQueueWithProperties(context_out, device, &queue_properties, &status);
assert(status == CL_SUCCESS);
size_t src_length = strlen(src);
cl_program program = clCreateProgramWithSource(context_out, 1, &src, &src_length, &status);
assert(status == CL_SUCCESS);
status = clBuildProgram(program, 1, &device, "", nullptr, nullptr);
assert(status == CL_SUCCESS);
kernel_out = clCreateKernel(program, "simple", &status);
assert(status == CL_SUCCESS);
}
// Returns true on success
bool get_device(cl_device_type device_type, cl_platform_id& platform_out, cl_device_id& device_out) {
cl_uint platform_count;
clGetPlatformIDs(0, nullptr, &platform_count);
std::vector<cl_platform_id> platforms(platform_count);
clGetPlatformIDs(platform_count, platforms.data(), nullptr);
std::vector<cl_device_id> devices;
for (auto& platform : platforms) {
cl_uint device_count;
cl_int status = clGetDeviceIDs(platform, device_type, 0, nullptr, &device_count);
devices.resize(device_count);
status |= clGetDeviceIDs(platform, device_type, device_count, devices.data(), 0);
if (status != CL_SUCCESS) {
continue;
}
for (auto& device : devices) {
platform_out = platform;
device_out = device;
char name[50];
clGetDeviceInfo(device, CL_DEVICE_NAME, 50, name, nullptr);
std::cout << "Selected device: " << name << std::endl;
return true;
}
}
return false;
}
void print_ref_count(const char* s, int i, cl_mem buffer) {
cl_uint reference_count;
cl_int status = clGetMemObjectInfo(buffer, CL_MEM_REFERENCE_COUNT, sizeof(reference_count), &reference_count, nullptr);
if (status == CL_SUCCESS) {
std::cout << "RC for buffer created in iteration " << i << ", after " << s << " is: " << reference_count << std::endl;
}
else {
std::cout << "Failed to get RC for buffer created in iteration " << i << std::endl;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment