Last active
September 1, 2017 03:40
-
-
Save jrprice/11343623 to your computer and use it in GitHub Desktop.
Benchmark for different methods of copying strided data from a buffer to an image using OpenCL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <stdio.h> | |
#include <sys/time.h> | |
#include <CL/cl.h> | |
// Platform and device indices to use | |
#define PLATFORM 0 | |
#define DEVICE 0 | |
#define WIDTH 1020 | |
#define HEIGHT 1020 | |
#define STRIDE 1024 | |
struct | |
{ | |
cl_platform_id platform; | |
cl_device_id device; | |
cl_context context; | |
cl_command_queue queue; | |
cl_program program; | |
cl_kernel copyKernel; | |
cl_kernel fillBufferKernel; | |
cl_kernel fillImageKernel; | |
cl_mem buffer; | |
cl_mem image; | |
cl_mem temp; | |
} cl = {0}; | |
void checkError(cl_int err, const char *operation); | |
long getTime(); | |
void init(); | |
void release(); | |
void clearImage(); | |
void printResult(const char *method, long runtime); | |
void methodCopyLines(); | |
void methodCopyRect(); | |
void methodKernel(); | |
const char *source[] = | |
{ | |
"kernel void copyToImage(global uchar4 *buffer, write_only image2d_t image)", | |
"{", | |
" int x = get_global_id(0);", | |
" int y = get_global_id(1);", | |
" uchar4 pixel = buffer[x + y*STRIDE];", | |
" write_imageui(image, (int2)(x, y), convert_uint4(pixel));", | |
"}", | |
"", | |
"kernel void fillBuffer(global uchar4 *buffer)", | |
"{", | |
" int x = get_global_id(0);", | |
" int y = get_global_id(1);", | |
" buffer[x + y*STRIDE] = (uchar4)((x + y*STRIDE)%256);", | |
"}", | |
"", | |
"kernel void fillImage(write_only image2d_t image)", | |
"{", | |
" int x = get_global_id(0);", | |
" int y = get_global_id(1);", | |
" write_imageui(image, (int2)(x, y), 0);", | |
"}", | |
"", | |
}; | |
int main(int argc, char *argv[]) | |
{ | |
init(); | |
long start, end; | |
// Kernel method | |
clearImage(); | |
start = getTime(); | |
methodKernel(); | |
end = getTime(); | |
printResult("kernel", end-start); | |
// Line-by-line copy method | |
clearImage(); | |
start = getTime(); | |
methodCopyLines(); | |
end = getTime(); | |
printResult("copy lines", end-start); | |
// Copy buffer rect with intermediate buffer | |
clearImage(); | |
start = getTime(); | |
methodCopyRect(); | |
end = getTime(); | |
printResult("copy rect", end-start); | |
release(); | |
return 0; | |
} | |
void methodKernel() | |
{ | |
cl_int err; | |
size_t global[2] = {WIDTH, HEIGHT}; | |
err = clEnqueueNDRangeKernel(cl.queue, cl.copyKernel, | |
2, NULL, global, NULL, 0, NULL, NULL); | |
checkError(err, "enqueuing copy kernel"); | |
err = clFinish(cl.queue); | |
checkError(err, "running copy kernel"); | |
} | |
void methodCopyLines() | |
{ | |
cl_int err = CL_SUCCESS; | |
size_t offset = 0; | |
size_t origin[3] = {0, 0, 0}; | |
size_t region[3] = {WIDTH, 1, 1}; | |
for (int i = 0; i < HEIGHT; i++) | |
{ | |
err |= clEnqueueCopyBufferToImage(cl.queue, cl.buffer, cl.image, | |
offset, origin, region, 0, NULL, NULL); | |
offset += STRIDE*4; | |
origin[1]++; | |
} | |
checkError(err, "enqueing line copies"); | |
err = clFinish(cl.queue); | |
checkError(err, "running copy lines"); | |
} | |
void methodCopyRect() | |
{ | |
cl_int err; | |
size_t origin[3] = {0, 0, 0}; | |
size_t region[3] = {WIDTH*4, HEIGHT, 1}; | |
err = clEnqueueCopyBufferRect(cl.queue, cl.buffer, cl.temp, | |
origin, origin, region, STRIDE*4, 0, 0, 0, | |
0, NULL, NULL); | |
checkError(err, "enqueing copy buffer rect"); | |
region[0] = WIDTH; | |
err = clEnqueueCopyBufferToImage(cl.queue, cl.temp, cl.image, | |
0, origin, region, 0, NULL, NULL); | |
checkError(err, "enqueing copy buffer to image"); | |
err = clFinish(cl.queue); | |
checkError(err, "running copy rect"); | |
} | |
void clearImage() | |
{ | |
cl_int err; | |
size_t global[2] = {WIDTH, HEIGHT}; | |
err = clEnqueueNDRangeKernel(cl.queue, cl.fillImageKernel, | |
2, NULL, global, NULL, 0, NULL, NULL); | |
checkError(err, "enqueuing fill image kernel"); | |
err = clFinish(cl.queue); | |
checkError(err, "clearing image"); | |
} | |
void printResult(const char *method, long runtime) | |
{ | |
cl_int err; | |
// Read result back | |
size_t origin[3] = {0, 0, 0}; | |
size_t region[3] = {WIDTH, HEIGHT, 1}; | |
unsigned char result[HEIGHT][WIDTH][4]; | |
err = clEnqueueReadImage(cl.queue, cl.image, CL_TRUE, | |
origin, region, 0, 0, result, 0, NULL, NULL); | |
checkError(err, "reading result"); | |
// Check values are valid | |
int errors = 0; | |
for (int y = 0; y < HEIGHT; y++) | |
{ | |
for (int x = 0; x < WIDTH; x++) | |
{ | |
for (int c = 0; c < 4; c++) | |
{ | |
if (result[y][x][c] != (x + y*STRIDE)%256) | |
{ | |
errors++; | |
if (errors < 8) | |
{ | |
printf("Error at (%d,%d): %d != %d\n", | |
x, y, result[y][x][c], (x + y*STRIDE)%256); | |
} | |
} | |
} | |
} | |
} | |
printf("Method %-12s: %4ld microseconds (%d errors)\n", | |
method, runtime, errors); | |
} | |
void checkError(cl_int err, const char *operation) | |
{ | |
if (err != CL_SUCCESS) | |
{ | |
printf("Error during operation '%s': %d\n", operation, err); | |
release(); | |
exit(1); | |
} | |
} | |
long getTime() | |
{ | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
return tv.tv_usec + tv.tv_sec*1e6; | |
} | |
void init() | |
{ | |
cl_int err; | |
cl_uint num; | |
cl_platform_id platforms[PLATFORM+1]; | |
err = clGetPlatformIDs(PLATFORM+1, platforms, &num); | |
checkError(err, "getting platform IDs"); | |
assert(num > PLATFORM); | |
cl.platform = platforms[PLATFORM]; | |
cl_device_id devices[DEVICE+1]; | |
err = clGetDeviceIDs(cl.platform, CL_DEVICE_TYPE_ALL, DEVICE+1, devices, &num); | |
checkError(err, "getting device IDs"); | |
assert(num > DEVICE); | |
cl.device = devices[DEVICE]; | |
char name[256]; | |
clGetDeviceInfo(cl.device, CL_DEVICE_NAME, 256, name, NULL); | |
printf("Using device: %s\n", name); | |
cl.context = clCreateContext(NULL, 1, &cl.device, NULL, NULL, &err); | |
checkError(err, "creating context"); | |
cl.queue = clCreateCommandQueue(cl.context, cl.device, 0, &err); | |
checkError(err, "creating command queue"); | |
size_t lines = sizeof(source)/sizeof(const char*); | |
cl.program = clCreateProgramWithSource(cl.context, lines, source, NULL, &err); | |
checkError(err, "creating program"); | |
char options[256]; | |
sprintf(options, "-DSTRIDE=%d", STRIDE); | |
err = clBuildProgram(cl.program, 1, &cl.device, options, NULL, NULL); | |
if (err == CL_BUILD_PROGRAM_FAILURE) | |
{ | |
size_t size; | |
clGetProgramBuildInfo(cl.program, cl.device, CL_PROGRAM_BUILD_LOG, | |
0, NULL, &size); | |
char *log = malloc(++size); | |
clGetProgramBuildInfo(cl.program, cl.device, CL_PROGRAM_BUILD_LOG, | |
size, log, NULL); | |
printf("Build log:\n%s\n", log); | |
} | |
checkError(err, "building program"); | |
cl.copyKernel = clCreateKernel(cl.program, "copyToImage", &err); | |
checkError(err, "creating copy kernel"); | |
cl.fillBufferKernel = clCreateKernel(cl.program, "fillBuffer", &err); | |
checkError(err, "creating fill buffer kernel"); | |
cl.fillImageKernel = clCreateKernel(cl.program, "fillImage", &err); | |
checkError(err, "creating fill image kernel"); | |
cl.buffer = clCreateBuffer(cl.context, CL_MEM_READ_WRITE, | |
STRIDE*HEIGHT*sizeof(cl_uchar4), NULL, &err); | |
checkError(err, "creating buffer"); | |
cl_image_format format = {CL_RGBA, CL_UNSIGNED_INT8}; | |
cl.image = clCreateImage2D(cl.context, CL_MEM_WRITE_ONLY, &format, | |
WIDTH, HEIGHT, 0, NULL, &err); | |
checkError(err, "creating image"); | |
cl.temp = clCreateBuffer(cl.context, CL_MEM_READ_WRITE, | |
WIDTH*HEIGHT*sizeof(cl_uchar4), NULL, &err); | |
checkError(err, "creating temporary buffer"); | |
err = clSetKernelArg(cl.fillBufferKernel, 0, sizeof(cl_mem), &cl.buffer); | |
checkError(err, "setting fill buffer kernel arguments"); | |
err = clSetKernelArg(cl.fillImageKernel, 0, sizeof(cl_mem), &cl.image); | |
checkError(err, "setting fill image kernel arguments"); | |
err = clSetKernelArg(cl.copyKernel, 0, sizeof(cl_mem), &cl.buffer); | |
err |= clSetKernelArg(cl.copyKernel, 1, sizeof(cl_mem), &cl.image); | |
checkError(err, "setting copy kernel arguments"); | |
size_t global[2] = {STRIDE, HEIGHT}; | |
err = clEnqueueNDRangeKernel(cl.queue, cl.fillBufferKernel, | |
2, NULL, global, NULL, 0, NULL, NULL); | |
checkError(err, "enqueuing fill buffer kernel"); | |
} | |
void release() | |
{ | |
if (cl.context) clReleaseContext(cl.context); | |
if (cl.queue) clReleaseCommandQueue(cl.queue); | |
if (cl.program) clReleaseProgram(cl.program); | |
if (cl.fillBufferKernel) clReleaseKernel(cl.fillBufferKernel); | |
if (cl.fillImageKernel) clReleaseKernel(cl.fillImageKernel); | |
if (cl.copyKernel) clReleaseKernel(cl.copyKernel); | |
if (cl.buffer) clReleaseMemObject(cl.buffer); | |
if (cl.image) clReleaseMemObject(cl.image); | |
if (cl.temp) clReleaseMemObject(cl.temp); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment