Last active
February 5, 2018 17:19
-
-
Save usbalbin/44cbf47240940c23698d8278e1d377da to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "CL/cl.h" | |
#include <vector> | |
#include <iostream> | |
#include <cassert> | |
#define ITERATION_COUNT 1 | |
#define ELEM_COUNT 1000000 | |
#define DEVICE_TYPE CL_DEVICE_TYPE_GPU | |
void setup(cl_device_type, cl_context&, cl_command_queue&, cl_kernel&); | |
bool get_device(cl_device_type device_type, cl_platform_id& platform_out, cl_device_id& device_out); | |
void print_ref_count(const char* s, int i, cl_mem buffer); | |
void leak(); | |
void no_leak(); | |
int main() { | |
leak(); | |
no_leak(); | |
} | |
void leak() { | |
cl_context context; | |
cl_command_queue queue; | |
cl_kernel kernel; | |
setup(DEVICE_TYPE, context, queue, kernel); | |
for (int i = 0; i < ITERATION_COUNT; i++) { | |
cl_int status; | |
cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, ELEM_COUNT * sizeof(float), nullptr, &status); | |
assert(status == CL_SUCCESS); | |
print_ref_count("creation", i, buf); | |
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf); | |
assert(status == CL_SUCCESS); | |
print_ref_count("setting argument", i, buf); | |
size_t elem_count = ELEM_COUNT; | |
status = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &elem_count, nullptr, 0, nullptr, nullptr); | |
assert(status == CL_SUCCESS); | |
print_ref_count("enqueueing kernel", i, buf); | |
status = clReleaseMemObject(buf); | |
assert(status == CL_SUCCESS); | |
} | |
} | |
void no_leak() { | |
cl_context context; | |
cl_command_queue queue; | |
cl_kernel kernel; | |
setup(DEVICE_TYPE, context, queue, kernel); | |
for (int i = 0; i < ITERATION_COUNT; i++) { | |
cl_int status; | |
cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, ELEM_COUNT * sizeof(float), nullptr, &status); | |
assert(status == CL_SUCCESS); | |
print_ref_count("creation", i, buf); //Buffer i created, reference count = 1 | |
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf); | |
assert(status == CL_SUCCESS); | |
print_ref_count("setting argument", i, buf); | |
size_t elem_count = ELEM_COUNT; | |
cl_event evnt; | |
status = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &elem_count, nullptr, 0, nullptr, &evnt); | |
assert(status == CL_SUCCESS); | |
status = clWaitForEvents(1, &evnt); | |
assert(status == CL_SUCCESS); | |
print_ref_count("waiting for kernel", i, buf); | |
status = clReleaseMemObject(buf); | |
assert(status == CL_SUCCESS); | |
} | |
} | |
void setup(cl_device_type device_type, cl_context& context_out, cl_command_queue& queue_out, cl_kernel& kernel_out) { | |
cl_platform_id platform; | |
cl_device_id device; | |
assert(get_device(device_type, platform, device)); | |
const char* src = | |
"kernel void simple(global float* out) {\ | |
out[get_global_id(0)] = get_global_id(0); \ | |
}"; | |
cl_int status; | |
cl_context_properties context_properties[]{ | |
CL_CONTEXT_PLATFORM, (cl_context_properties)platform, | |
0 | |
}; | |
context_out = clCreateContext(context_properties, 1, &device, nullptr, nullptr, &status); | |
assert(status == CL_SUCCESS); | |
cl_queue_properties queue_properties = 0; | |
queue_out = clCreateCommandQueueWithProperties(context_out, device, &queue_properties, &status); | |
assert(status == CL_SUCCESS); | |
size_t src_length = strlen(src); | |
cl_program program = clCreateProgramWithSource(context_out, 1, &src, &src_length, &status); | |
assert(status == CL_SUCCESS); | |
status = clBuildProgram(program, 1, &device, "", nullptr, nullptr); | |
assert(status == CL_SUCCESS); | |
kernel_out = clCreateKernel(program, "simple", &status); | |
assert(status == CL_SUCCESS); | |
} | |
// Returns true on success | |
bool get_device(cl_device_type device_type, cl_platform_id& platform_out, cl_device_id& device_out) { | |
cl_uint platform_count; | |
clGetPlatformIDs(0, nullptr, &platform_count); | |
std::vector<cl_platform_id> platforms(platform_count); | |
clGetPlatformIDs(platform_count, platforms.data(), nullptr); | |
std::vector<cl_device_id> devices; | |
for (auto& platform : platforms) { | |
cl_uint device_count; | |
cl_int status = clGetDeviceIDs(platform, device_type, 0, nullptr, &device_count); | |
devices.resize(device_count); | |
status |= clGetDeviceIDs(platform, device_type, device_count, devices.data(), 0); | |
if (status != CL_SUCCESS) { | |
continue; | |
} | |
for (auto& device : devices) { | |
platform_out = platform; | |
device_out = device; | |
char name[50]; | |
clGetDeviceInfo(device, CL_DEVICE_NAME, 50, name, nullptr); | |
std::cout << "Selected device: " << name << std::endl; | |
return true; | |
} | |
} | |
return false; | |
} | |
void print_ref_count(const char* s, int i, cl_mem buffer) { | |
cl_uint reference_count; | |
cl_int status = clGetMemObjectInfo(buffer, CL_MEM_REFERENCE_COUNT, sizeof(reference_count), &reference_count, nullptr); | |
if (status == CL_SUCCESS) { | |
std::cout << "RC for buffer created in iteration " << i << ", after " << s << " is: " << reference_count << std::endl; | |
} | |
else { | |
std::cout << "Failed to get RC for buffer created in iteration " << i << std::endl; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment