Skip to content

Instantly share code, notes, and snippets.

@jrprice
Last active September 1, 2017 03:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jrprice/11343623 to your computer and use it in GitHub Desktop.
Save jrprice/11343623 to your computer and use it in GitHub Desktop.
Benchmark for different methods of copying strided data from a buffer to an image using OpenCL
#include <assert.h>
#include <stdio.h>
#include <sys/time.h>
#include <CL/cl.h>
// Platform and device indices to use
#define PLATFORM 0
#define DEVICE 0
#define WIDTH 1020
#define HEIGHT 1020
#define STRIDE 1024
struct
{
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel copyKernel;
cl_kernel fillBufferKernel;
cl_kernel fillImageKernel;
cl_mem buffer;
cl_mem image;
cl_mem temp;
} cl = {0};
void checkError(cl_int err, const char *operation);
long getTime();
void init();
void release();
void clearImage();
void printResult(const char *method, long runtime);
void methodCopyLines();
void methodCopyRect();
void methodKernel();
const char *source[] =
{
"kernel void copyToImage(global uchar4 *buffer, write_only image2d_t image)",
"{",
" int x = get_global_id(0);",
" int y = get_global_id(1);",
" uchar4 pixel = buffer[x + y*STRIDE];",
" write_imageui(image, (int2)(x, y), convert_uint4(pixel));",
"}",
"",
"kernel void fillBuffer(global uchar4 *buffer)",
"{",
" int x = get_global_id(0);",
" int y = get_global_id(1);",
" buffer[x + y*STRIDE] = (uchar4)((x + y*STRIDE)%256);",
"}",
"",
"kernel void fillImage(write_only image2d_t image)",
"{",
" int x = get_global_id(0);",
" int y = get_global_id(1);",
" write_imageui(image, (int2)(x, y), 0);",
"}",
"",
};
int main(int argc, char *argv[])
{
init();
long start, end;
// Kernel method
clearImage();
start = getTime();
methodKernel();
end = getTime();
printResult("kernel", end-start);
// Line-by-line copy method
clearImage();
start = getTime();
methodCopyLines();
end = getTime();
printResult("copy lines", end-start);
// Copy buffer rect with intermediate buffer
clearImage();
start = getTime();
methodCopyRect();
end = getTime();
printResult("copy rect", end-start);
release();
return 0;
}
void methodKernel()
{
cl_int err;
size_t global[2] = {WIDTH, HEIGHT};
err = clEnqueueNDRangeKernel(cl.queue, cl.copyKernel,
2, NULL, global, NULL, 0, NULL, NULL);
checkError(err, "enqueuing copy kernel");
err = clFinish(cl.queue);
checkError(err, "running copy kernel");
}
void methodCopyLines()
{
cl_int err = CL_SUCCESS;
size_t offset = 0;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {WIDTH, 1, 1};
for (int i = 0; i < HEIGHT; i++)
{
err |= clEnqueueCopyBufferToImage(cl.queue, cl.buffer, cl.image,
offset, origin, region, 0, NULL, NULL);
offset += STRIDE*4;
origin[1]++;
}
checkError(err, "enqueing line copies");
err = clFinish(cl.queue);
checkError(err, "running copy lines");
}
void methodCopyRect()
{
cl_int err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {WIDTH*4, HEIGHT, 1};
err = clEnqueueCopyBufferRect(cl.queue, cl.buffer, cl.temp,
origin, origin, region, STRIDE*4, 0, 0, 0,
0, NULL, NULL);
checkError(err, "enqueing copy buffer rect");
region[0] = WIDTH;
err = clEnqueueCopyBufferToImage(cl.queue, cl.temp, cl.image,
0, origin, region, 0, NULL, NULL);
checkError(err, "enqueing copy buffer to image");
err = clFinish(cl.queue);
checkError(err, "running copy rect");
}
void clearImage()
{
cl_int err;
size_t global[2] = {WIDTH, HEIGHT};
err = clEnqueueNDRangeKernel(cl.queue, cl.fillImageKernel,
2, NULL, global, NULL, 0, NULL, NULL);
checkError(err, "enqueuing fill image kernel");
err = clFinish(cl.queue);
checkError(err, "clearing image");
}
void printResult(const char *method, long runtime)
{
cl_int err;
// Read result back
size_t origin[3] = {0, 0, 0};
size_t region[3] = {WIDTH, HEIGHT, 1};
unsigned char result[HEIGHT][WIDTH][4];
err = clEnqueueReadImage(cl.queue, cl.image, CL_TRUE,
origin, region, 0, 0, result, 0, NULL, NULL);
checkError(err, "reading result");
// Check values are valid
int errors = 0;
for (int y = 0; y < HEIGHT; y++)
{
for (int x = 0; x < WIDTH; x++)
{
for (int c = 0; c < 4; c++)
{
if (result[y][x][c] != (x + y*STRIDE)%256)
{
errors++;
if (errors < 8)
{
printf("Error at (%d,%d): %d != %d\n",
x, y, result[y][x][c], (x + y*STRIDE)%256);
}
}
}
}
}
printf("Method %-12s: %4ld microseconds (%d errors)\n",
method, runtime, errors);
}
void checkError(cl_int err, const char *operation)
{
if (err != CL_SUCCESS)
{
printf("Error during operation '%s': %d\n", operation, err);
release();
exit(1);
}
}
long getTime()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_usec + tv.tv_sec*1e6;
}
void init()
{
cl_int err;
cl_uint num;
cl_platform_id platforms[PLATFORM+1];
err = clGetPlatformIDs(PLATFORM+1, platforms, &num);
checkError(err, "getting platform IDs");
assert(num > PLATFORM);
cl.platform = platforms[PLATFORM];
cl_device_id devices[DEVICE+1];
err = clGetDeviceIDs(cl.platform, CL_DEVICE_TYPE_ALL, DEVICE+1, devices, &num);
checkError(err, "getting device IDs");
assert(num > DEVICE);
cl.device = devices[DEVICE];
char name[256];
clGetDeviceInfo(cl.device, CL_DEVICE_NAME, 256, name, NULL);
printf("Using device: %s\n", name);
cl.context = clCreateContext(NULL, 1, &cl.device, NULL, NULL, &err);
checkError(err, "creating context");
cl.queue = clCreateCommandQueue(cl.context, cl.device, 0, &err);
checkError(err, "creating command queue");
size_t lines = sizeof(source)/sizeof(const char*);
cl.program = clCreateProgramWithSource(cl.context, lines, source, NULL, &err);
checkError(err, "creating program");
char options[256];
sprintf(options, "-DSTRIDE=%d", STRIDE);
err = clBuildProgram(cl.program, 1, &cl.device, options, NULL, NULL);
if (err == CL_BUILD_PROGRAM_FAILURE)
{
size_t size;
clGetProgramBuildInfo(cl.program, cl.device, CL_PROGRAM_BUILD_LOG,
0, NULL, &size);
char *log = malloc(++size);
clGetProgramBuildInfo(cl.program, cl.device, CL_PROGRAM_BUILD_LOG,
size, log, NULL);
printf("Build log:\n%s\n", log);
}
checkError(err, "building program");
cl.copyKernel = clCreateKernel(cl.program, "copyToImage", &err);
checkError(err, "creating copy kernel");
cl.fillBufferKernel = clCreateKernel(cl.program, "fillBuffer", &err);
checkError(err, "creating fill buffer kernel");
cl.fillImageKernel = clCreateKernel(cl.program, "fillImage", &err);
checkError(err, "creating fill image kernel");
cl.buffer = clCreateBuffer(cl.context, CL_MEM_READ_WRITE,
STRIDE*HEIGHT*sizeof(cl_uchar4), NULL, &err);
checkError(err, "creating buffer");
cl_image_format format = {CL_RGBA, CL_UNSIGNED_INT8};
cl.image = clCreateImage2D(cl.context, CL_MEM_WRITE_ONLY, &format,
WIDTH, HEIGHT, 0, NULL, &err);
checkError(err, "creating image");
cl.temp = clCreateBuffer(cl.context, CL_MEM_READ_WRITE,
WIDTH*HEIGHT*sizeof(cl_uchar4), NULL, &err);
checkError(err, "creating temporary buffer");
err = clSetKernelArg(cl.fillBufferKernel, 0, sizeof(cl_mem), &cl.buffer);
checkError(err, "setting fill buffer kernel arguments");
err = clSetKernelArg(cl.fillImageKernel, 0, sizeof(cl_mem), &cl.image);
checkError(err, "setting fill image kernel arguments");
err = clSetKernelArg(cl.copyKernel, 0, sizeof(cl_mem), &cl.buffer);
err |= clSetKernelArg(cl.copyKernel, 1, sizeof(cl_mem), &cl.image);
checkError(err, "setting copy kernel arguments");
size_t global[2] = {STRIDE, HEIGHT};
err = clEnqueueNDRangeKernel(cl.queue, cl.fillBufferKernel,
2, NULL, global, NULL, 0, NULL, NULL);
checkError(err, "enqueuing fill buffer kernel");
}
void release()
{
if (cl.context) clReleaseContext(cl.context);
if (cl.queue) clReleaseCommandQueue(cl.queue);
if (cl.program) clReleaseProgram(cl.program);
if (cl.fillBufferKernel) clReleaseKernel(cl.fillBufferKernel);
if (cl.fillImageKernel) clReleaseKernel(cl.fillImageKernel);
if (cl.copyKernel) clReleaseKernel(cl.copyKernel);
if (cl.buffer) clReleaseMemObject(cl.buffer);
if (cl.image) clReleaseMemObject(cl.image);
if (cl.temp) clReleaseMemObject(cl.temp);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment