Skip to content

Instantly share code, notes, and snippets.

Created September 21, 2015 00:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/fdb18f394e33f8eb5b27 to your computer and use it in GitHub Desktop.
Save anonymous/fdb18f394e33f8eb5b27 to your computer and use it in GitHub Desktop.
OpenCL vector global load/store tests
CC = gcc
CFLAGS = -Wall -O2
LDFLAGS = -lOpenCL
all: test_double test_double2 test_double3 test_double4
%: %.c
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
#include <CL/opencl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
const char source[] =
"#if __OPENCL_VERSION__ < 120\n"
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"#endif\n"
"\n"
"__kernel void test_double(__global double *restrict d_pos,\n"
" __global double *restrict d_vel)\n"
"{\n"
" const uint gid = get_global_id(0);\n"
" const double timestep = 0.001;\n"
" d_pos[gid] += d_vel[gid]*timestep;\n"
"}\n"
;
static const char *cl_error(int err);
#define error(fmt, ...) \
{ \
fprintf(stderr, "\033[1m\033[31merror: " fmt "\033[0m\n", __VA_ARGS__); \
return 1; \
}
#define CL(err) \
if (err != CL_SUCCESS) error("%s", cl_error(err));
int main(int argc, const char *const *argv)
{
const size_t glob_size[1] = {256*16384};
const size_t work_size[1] = {256};
cl_int err;
char str[1024];
cl_platform_id platform;
CL(clGetPlatformIDs(1, &platform, NULL));
CL(clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(str), str, NULL));
printf("%s\n", str);
cl_device_id device;
CL(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL));
CL(clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(str), &str, NULL));
printf("%s\n", str);
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
CL(err);
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
CL(err);
cl_mem d_r = clCreateBuffer(context, CL_MEM_READ_WRITE, glob_size[0]*sizeof(cl_double), NULL, &err);
CL(err);
cl_mem d_v = clCreateBuffer(context, CL_MEM_READ_WRITE, glob_size[0]*sizeof(cl_double), NULL, &err);
CL(err);
const char *sources[1] = {source};
cl_program program = clCreateProgramWithSource(context, 1, sources, NULL, &err);
CL(err);
CL(clBuildProgram(program, 1, &device, NULL, NULL, NULL));
size_t size;
CL(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size), &size, NULL));
char *buf = malloc(size);
CL(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(buf), &buf, NULL));
FILE *f = fopen("test_double.ptx", "w");
fprintf(f, "%s", buf);
fclose(f);
free(buf);
cl_kernel kernel = clCreateKernel(program, "test_double", &err);
CL(err);
CL(clSetKernelArg(kernel, 0, sizeof(d_r), &d_r));
CL(clSetKernelArg(kernel, 1, sizeof(d_v), &d_v));
double elapsed = 0;
int i;
for (i = 0; i < 1000; ++i) {
cl_event event;
CL(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, glob_size, work_size, 0, NULL, &event));
CL(clWaitForEvents(1, &event));
cl_ulong start, end;
CL(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(start), &start, NULL));
CL(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL));
CL(clReleaseEvent(event));
elapsed += ((end-start) - elapsed) / (i+1);
}
CL(clReleaseKernel(kernel));
CL(clReleaseProgram(program));
CL(clReleaseMemObject(d_r));
CL(clReleaseMemObject(d_v));
CL(clReleaseCommandQueue(queue));
CL(clReleaseContext(context));
printf("%s: \033[1m%.3g ms\033[0m\n", argv[0], elapsed*1e-6);
return 0;
}
static const char *cl_error(cl_int err)
{
switch (err) {
case CL_SUCCESS:
return "success";
case CL_DEVICE_NOT_FOUND:
return "device not found";
case CL_DEVICE_NOT_AVAILABLE:
return "device not available";
case CL_COMPILER_NOT_AVAILABLE:
return "compiler not available";
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
return "mem object allocation failure";
case CL_OUT_OF_RESOURCES:
return "out of resources";
case CL_OUT_OF_HOST_MEMORY:
return "out of host memory";
case CL_PROFILING_INFO_NOT_AVAILABLE:
return "profiling info not available";
case CL_MEM_COPY_OVERLAP:
return "mem copy overlap";
case CL_IMAGE_FORMAT_MISMATCH:
return "image format mismatch";
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return "image format not supported";
case CL_BUILD_PROGRAM_FAILURE:
return "build program failure";
case CL_MAP_FAILURE:
return "map failure";
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
return "misaligned sub buffer offset";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
return "exec status error for events in wait list";
#if CL_VERSION_1_2
case CL_COMPILE_PROGRAM_FAILURE:
return "compile program failure";
case CL_LINKER_NOT_AVAILABLE:
return "linker not available";
case CL_LINK_PROGRAM_FAILURE:
return "link program failure";
case CL_DEVICE_PARTITION_FAILED:
return "device partition failed";
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
return "kernel arg info not available";
#endif
case CL_INVALID_VALUE:
return "invalid value";
case CL_INVALID_DEVICE_TYPE:
return "invalid device type";
case CL_INVALID_PLATFORM:
return "invalid platform";
case CL_INVALID_DEVICE:
return "invalid device";
case CL_INVALID_CONTEXT:
return "invalid context";
case CL_INVALID_QUEUE_PROPERTIES:
return "invalid queue properties";
case CL_INVALID_COMMAND_QUEUE:
return "invalid command queue";
case CL_INVALID_HOST_PTR:
return "invalid host ptr";
case CL_INVALID_MEM_OBJECT:
return "invalid mem object";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return "invalid image format descriptor";
case CL_INVALID_IMAGE_SIZE:
return "invalid image size";
case CL_INVALID_SAMPLER:
return "invalid sampler";
case CL_INVALID_BINARY:
return "invalid binary";
case CL_INVALID_BUILD_OPTIONS:
return "invalid build options";
case CL_INVALID_PROGRAM:
return "invalid program";
case CL_INVALID_PROGRAM_EXECUTABLE:
return "invalid program executable";
case CL_INVALID_KERNEL_NAME:
return "invalid kernel name";
case CL_INVALID_KERNEL_DEFINITION:
return "invalid kernel definition";
case CL_INVALID_KERNEL:
return "invalid kernel";
case CL_INVALID_ARG_INDEX:
return "invalid arg index";
case CL_INVALID_ARG_VALUE:
return "invalid arg value";
case CL_INVALID_ARG_SIZE:
return "invalid arg size";
case CL_INVALID_KERNEL_ARGS:
return "invalid kernel args";
case CL_INVALID_WORK_DIMENSION:
return "invalid work dimension";
case CL_INVALID_WORK_GROUP_SIZE:
return "invalid work group size";
case CL_INVALID_WORK_ITEM_SIZE:
return "invalid work item size";
case CL_INVALID_GLOBAL_OFFSET:
return "invalid global offset";
case CL_INVALID_EVENT_WAIT_LIST:
return "invalid event wait list";
case CL_INVALID_EVENT:
return "invalid event";
case CL_INVALID_OPERATION:
return "invalid operation";
case CL_INVALID_GL_OBJECT:
return "invalid gl object";
case CL_INVALID_BUFFER_SIZE:
return "invalid buffer size";
case CL_INVALID_MIP_LEVEL:
return "invalid mip level";
case CL_INVALID_GLOBAL_WORK_SIZE:
return "invalid global work size";
case CL_INVALID_PROPERTY:
return "invalid property";
#if CL_VERSION_1_2
case CL_INVALID_IMAGE_DESCRIPTOR:
return "invalid image descriptor";
case CL_INVALID_COMPILER_OPTIONS:
return "invalid compiler options";
case CL_INVALID_LINKER_OPTIONS:
return "invalid linker options";
case CL_INVALID_DEVICE_PARTITION_COUNT:
return "invalid device partition count";
#endif
default:
return "unknown error";
}
}
#include <CL/opencl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
const char source[] =
"#if __OPENCL_VERSION__ < 120\n"
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"#endif\n"
"\n"
"__kernel void test_double2(__global double2 *restrict d_pos,\n"
" __global double2 *restrict d_vel)\n"
"{\n"
" const uint gid = get_global_id(0);\n"
" const double timestep = 0.001;\n"
" d_pos[gid] += d_vel[gid]*timestep;\n"
"}\n"
;
static const char *cl_error(int err);
#define error(fmt, ...) \
{ \
fprintf(stderr, "\033[1m\033[31merror: " fmt "\033[0m\n", __VA_ARGS__); \
return 1; \
}
#define CL(err) \
if (err != CL_SUCCESS) error("%s", cl_error(err));
int main(int argc, const char *const *argv)
{
const size_t glob_size[1] = {256*16384};
const size_t work_size[1] = {256};
cl_int err;
char str[1024];
cl_platform_id platform;
CL(clGetPlatformIDs(1, &platform, NULL));
CL(clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(str), str, NULL));
printf("%s\n", str);
cl_device_id device;
CL(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL));
CL(clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(str), &str, NULL));
printf("%s\n", str);
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
CL(err);
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
CL(err);
cl_mem d_r = clCreateBuffer(context, CL_MEM_READ_WRITE, glob_size[0]*sizeof(cl_double2), NULL, &err);
CL(err);
cl_mem d_v = clCreateBuffer(context, CL_MEM_READ_WRITE, glob_size[0]*sizeof(cl_double2), NULL, &err);
CL(err);
const char *sources[1] = {source};
cl_program program = clCreateProgramWithSource(context, 1, sources, NULL, &err);
CL(err);
CL(clBuildProgram(program, 1, &device, NULL, NULL, NULL));
size_t size;
CL(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size), &size, NULL));
char *buf = malloc(size);
CL(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(buf), &buf, NULL));
FILE *f = fopen("test_double2.ptx", "w");
fprintf(f, "%s", buf);
fclose(f);
free(buf);
cl_kernel kernel = clCreateKernel(program, "test_double2", &err);
CL(err);
CL(clSetKernelArg(kernel, 0, sizeof(d_r), &d_r));
CL(clSetKernelArg(kernel, 1, sizeof(d_v), &d_v));
double elapsed = 0;
int i;
for (i = 0; i < 1000; ++i) {
cl_event event;
CL(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, glob_size, work_size, 0, NULL, &event));
CL(clWaitForEvents(1, &event));
cl_ulong start, end;
CL(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(start), &start, NULL));
CL(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL));
CL(clReleaseEvent(event));
elapsed += ((end-start) - elapsed) / (i+1);
}
CL(clReleaseKernel(kernel));
CL(clReleaseProgram(program));
CL(clReleaseMemObject(d_r));
CL(clReleaseMemObject(d_v));
CL(clReleaseCommandQueue(queue));
CL(clReleaseContext(context));
printf("%s: \033[1m%.3g ms\033[0m\n", argv[0], elapsed*1e-6);
return 0;
}
static const char *cl_error(cl_int err)
{
switch (err) {
case CL_SUCCESS:
return "success";
case CL_DEVICE_NOT_FOUND:
return "device not found";
case CL_DEVICE_NOT_AVAILABLE:
return "device not available";
case CL_COMPILER_NOT_AVAILABLE:
return "compiler not available";
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
return "mem object allocation failure";
case CL_OUT_OF_RESOURCES:
return "out of resources";
case CL_OUT_OF_HOST_MEMORY:
return "out of host memory";
case CL_PROFILING_INFO_NOT_AVAILABLE:
return "profiling info not available";
case CL_MEM_COPY_OVERLAP:
return "mem copy overlap";
case CL_IMAGE_FORMAT_MISMATCH:
return "image format mismatch";
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return "image format not supported";
case CL_BUILD_PROGRAM_FAILURE:
return "build program failure";
case CL_MAP_FAILURE:
return "map failure";
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
return "misaligned sub buffer offset";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
return "exec status error for events in wait list";
#if CL_VERSION_1_2
case CL_COMPILE_PROGRAM_FAILURE:
return "compile program failure";
case CL_LINKER_NOT_AVAILABLE:
return "linker not available";
case CL_LINK_PROGRAM_FAILURE:
return "link program failure";
case CL_DEVICE_PARTITION_FAILED:
return "device partition failed";
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
return "kernel arg info not available";
#endif
case CL_INVALID_VALUE:
return "invalid value";
case CL_INVALID_DEVICE_TYPE:
return "invalid device type";
case CL_INVALID_PLATFORM:
return "invalid platform";
case CL_INVALID_DEVICE:
return "invalid device";
case CL_INVALID_CONTEXT:
return "invalid context";
case CL_INVALID_QUEUE_PROPERTIES:
return "invalid queue properties";
case CL_INVALID_COMMAND_QUEUE:
return "invalid command queue";
case CL_INVALID_HOST_PTR:
return "invalid host ptr";
case CL_INVALID_MEM_OBJECT:
return "invalid mem object";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return "invalid image format descriptor";
case CL_INVALID_IMAGE_SIZE:
return "invalid image size";
case CL_INVALID_SAMPLER:
return "invalid sampler";
case CL_INVALID_BINARY:
return "invalid binary";
case CL_INVALID_BUILD_OPTIONS:
return "invalid build options";
case CL_INVALID_PROGRAM:
return "invalid program";
case CL_INVALID_PROGRAM_EXECUTABLE:
return "invalid program executable";
case CL_INVALID_KERNEL_NAME:
return "invalid kernel name";
case CL_INVALID_KERNEL_DEFINITION:
return "invalid kernel definition";
case CL_INVALID_KERNEL:
return "invalid kernel";
case CL_INVALID_ARG_INDEX:
return "invalid arg index";
case CL_INVALID_ARG_VALUE:
return "invalid arg value";
case CL_INVALID_ARG_SIZE:
return "invalid arg size";
case CL_INVALID_KERNEL_ARGS:
return "invalid kernel args";
case CL_INVALID_WORK_DIMENSION:
return "invalid work dimension";
case CL_INVALID_WORK_GROUP_SIZE:
return "invalid work group size";
case CL_INVALID_WORK_ITEM_SIZE:
return "invalid work item size";
case CL_INVALID_GLOBAL_OFFSET:
return "invalid global offset";
case CL_INVALID_EVENT_WAIT_LIST:
return "invalid event wait list";
case CL_INVALID_EVENT:
return "invalid event";
case CL_INVALID_OPERATION:
return "invalid operation";
case CL_INVALID_GL_OBJECT:
return "invalid gl object";
case CL_INVALID_BUFFER_SIZE:
return "invalid buffer size";
case CL_INVALID_MIP_LEVEL:
return "invalid mip level";
case CL_INVALID_GLOBAL_WORK_SIZE:
return "invalid global work size";
case CL_INVALID_PROPERTY:
return "invalid property";
#if CL_VERSION_1_2
case CL_INVALID_IMAGE_DESCRIPTOR:
return "invalid image descriptor";
case CL_INVALID_COMPILER_OPTIONS:
return "invalid compiler options";
case CL_INVALID_LINKER_OPTIONS:
return "invalid linker options";
case CL_INVALID_DEVICE_PARTITION_COUNT:
return "invalid device partition count";
#endif
default:
return "unknown error";
}
}
#include <CL/opencl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
const char source[] =
"#if __OPENCL_VERSION__ < 120\n"
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"#endif\n"
"\n"
"__kernel void test_double3(__global double3 *restrict d_pos,\n"
" __global double3 *restrict d_vel)\n"
"{\n"
" const uint gid = get_global_id(0);\n"
" const double timestep = 0.001;\n"
" d_pos[gid] += d_vel[gid]*timestep;\n"
"}\n"
;
static const char *cl_error(int err);
#define error(fmt, ...) \
{ \
fprintf(stderr, "\033[1m\033[31merror: " fmt "\033[0m\n", __VA_ARGS__); \
return 1; \
}
#define CL(err) \
if (err != CL_SUCCESS) error("%s", cl_error(err));
int main(int argc, const char *const *argv)
{
const size_t glob_size[1] = {256*16384};
const size_t work_size[1] = {256};
cl_int err;
char str[1024];
cl_platform_id platform;
CL(clGetPlatformIDs(1, &platform, NULL));
CL(clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(str), str, NULL));
printf("%s\n", str);
cl_device_id device;
CL(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL));
CL(clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(str), &str, NULL));
printf("%s\n", str);
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
CL(err);
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
CL(err);
cl_mem d_r = clCreateBuffer(context, CL_MEM_READ_WRITE, glob_size[0]*sizeof(cl_double3), NULL, &err);
CL(err);
cl_mem d_v = clCreateBuffer(context, CL_MEM_READ_WRITE, glob_size[0]*sizeof(cl_double3), NULL, &err);
CL(err);
const char *sources[1] = {source};
cl_program program = clCreateProgramWithSource(context, 1, sources, NULL, &err);
CL(err);
CL(clBuildProgram(program, 1, &device, NULL, NULL, NULL));
size_t size;
CL(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size), &size, NULL));
char *buf = malloc(size);
CL(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(buf), &buf, NULL));
FILE *f = fopen("test_double3.ptx", "w");
fprintf(f, "%s", buf);
fclose(f);
free(buf);
cl_kernel kernel = clCreateKernel(program, "test_double3", &err);
CL(err);
CL(clSetKernelArg(kernel, 0, sizeof(d_r), &d_r));
CL(clSetKernelArg(kernel, 1, sizeof(d_v), &d_v));
double elapsed = 0;
int i;
for (i = 0; i < 1000; ++i) {
cl_event event;
CL(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, glob_size, work_size, 0, NULL, &event));
CL(clWaitForEvents(1, &event));
cl_ulong start, end;
CL(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(start), &start, NULL));
CL(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL));
CL(clReleaseEvent(event));
elapsed += ((end-start) - elapsed) / (i+1);
}
CL(clReleaseKernel(kernel));
CL(clReleaseProgram(program));
CL(clReleaseMemObject(d_r));
CL(clReleaseMemObject(d_v));
CL(clReleaseCommandQueue(queue));
CL(clReleaseContext(context));
printf("%s: \033[1m%.3g ms\033[0m\n", argv[0], elapsed*1e-6);
return 0;
}
static const char *cl_error(cl_int err)
{
switch (err) {
case CL_SUCCESS:
return "success";
case CL_DEVICE_NOT_FOUND:
return "device not found";
case CL_DEVICE_NOT_AVAILABLE:
return "device not available";
case CL_COMPILER_NOT_AVAILABLE:
return "compiler not available";
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
return "mem object allocation failure";
case CL_OUT_OF_RESOURCES:
return "out of resources";
case CL_OUT_OF_HOST_MEMORY:
return "out of host memory";
case CL_PROFILING_INFO_NOT_AVAILABLE:
return "profiling info not available";
case CL_MEM_COPY_OVERLAP:
return "mem copy overlap";
case CL_IMAGE_FORMAT_MISMATCH:
return "image format mismatch";
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return "image format not supported";
case CL_BUILD_PROGRAM_FAILURE:
return "build program failure";
case CL_MAP_FAILURE:
return "map failure";
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
return "misaligned sub buffer offset";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
return "exec status error for events in wait list";
#if CL_VERSION_1_2
case CL_COMPILE_PROGRAM_FAILURE:
return "compile program failure";
case CL_LINKER_NOT_AVAILABLE:
return "linker not available";
case CL_LINK_PROGRAM_FAILURE:
return "link program failure";
case CL_DEVICE_PARTITION_FAILED:
return "device partition failed";
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
return "kernel arg info not available";
#endif
case CL_INVALID_VALUE:
return "invalid value";
case CL_INVALID_DEVICE_TYPE:
return "invalid device type";
case CL_INVALID_PLATFORM:
return "invalid platform";
case CL_INVALID_DEVICE:
return "invalid device";
case CL_INVALID_CONTEXT:
return "invalid context";
case CL_INVALID_QUEUE_PROPERTIES:
return "invalid queue properties";
case CL_INVALID_COMMAND_QUEUE:
return "invalid command queue";
case CL_INVALID_HOST_PTR:
return "invalid host ptr";
case CL_INVALID_MEM_OBJECT:
return "invalid mem object";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return "invalid image format descriptor";
case CL_INVALID_IMAGE_SIZE:
return "invalid image size";
case CL_INVALID_SAMPLER:
return "invalid sampler";
case CL_INVALID_BINARY:
return "invalid binary";
case CL_INVALID_BUILD_OPTIONS:
return "invalid build options";
case CL_INVALID_PROGRAM:
return "invalid program";
case CL_INVALID_PROGRAM_EXECUTABLE:
return "invalid program executable";
case CL_INVALID_KERNEL_NAME:
return "invalid kernel name";
case CL_INVALID_KERNEL_DEFINITION:
return "invalid kernel definition";
case CL_INVALID_KERNEL:
return "invalid kernel";
case CL_INVALID_ARG_INDEX:
return "invalid arg index";
case CL_INVALID_ARG_VALUE:
return "invalid arg value";
case CL_INVALID_ARG_SIZE:
return "invalid arg size";
case CL_INVALID_KERNEL_ARGS:
return "invalid kernel args";
case CL_INVALID_WORK_DIMENSION:
return "invalid work dimension";
case CL_INVALID_WORK_GROUP_SIZE:
return "invalid work group size";
case CL_INVALID_WORK_ITEM_SIZE:
return "invalid work item size";
case CL_INVALID_GLOBAL_OFFSET:
return "invalid global offset";
case CL_INVALID_EVENT_WAIT_LIST:
return "invalid event wait list";
case CL_INVALID_EVENT:
return "invalid event";
case CL_INVALID_OPERATION:
return "invalid operation";
case CL_INVALID_GL_OBJECT:
return "invalid gl object";
case CL_INVALID_BUFFER_SIZE:
return "invalid buffer size";
case CL_INVALID_MIP_LEVEL:
return "invalid mip level";
case CL_INVALID_GLOBAL_WORK_SIZE:
return "invalid global work size";
case CL_INVALID_PROPERTY:
return "invalid property";
#if CL_VERSION_1_2
case CL_INVALID_IMAGE_DESCRIPTOR:
return "invalid image descriptor";
case CL_INVALID_COMPILER_OPTIONS:
return "invalid compiler options";
case CL_INVALID_LINKER_OPTIONS:
return "invalid linker options";
case CL_INVALID_DEVICE_PARTITION_COUNT:
return "invalid device partition count";
#endif
default:
return "unknown error";
}
}
#include <CL/opencl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
const char source[] =
"#if __OPENCL_VERSION__ < 120\n"
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"#endif\n"
"\n"
"__kernel void test_double4(__global double4 *restrict d_pos,\n"
" __global double4 *restrict d_vel)\n"
"{\n"
" const uint gid = get_global_id(0);\n"
" const double timestep = 0.001;\n"
" d_pos[gid] += d_vel[gid]*timestep;\n"
"}\n"
;
static const char *cl_error(int err);
#define error(fmt, ...) \
{ \
fprintf(stderr, "\033[1m\033[31merror: " fmt "\033[0m\n", __VA_ARGS__); \
return 1; \
}
#define CL(err) \
if (err != CL_SUCCESS) error("%s", cl_error(err));
int main(int argc, const char *const *argv)
{
const size_t glob_size[1] = {256*16384};
const size_t work_size[1] = {256};
cl_int err;
char str[1024];
cl_platform_id platform;
CL(clGetPlatformIDs(1, &platform, NULL));
CL(clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(str), str, NULL));
printf("%s\n", str);
cl_device_id device;
CL(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL));
CL(clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(str), &str, NULL));
printf("%s\n", str);
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
CL(err);
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
CL(err);
cl_mem d_r = clCreateBuffer(context, CL_MEM_READ_WRITE, glob_size[0]*sizeof(cl_double4), NULL, &err);
CL(err);
cl_mem d_v = clCreateBuffer(context, CL_MEM_READ_WRITE, glob_size[0]*sizeof(cl_double4), NULL, &err);
CL(err);
const char *sources[1] = {source};
cl_program program = clCreateProgramWithSource(context, 1, sources, NULL, &err);
CL(err);
CL(clBuildProgram(program, 1, &device, NULL, NULL, NULL));
size_t size;
CL(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size), &size, NULL));
char *buf = malloc(size);
CL(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(buf), &buf, NULL));
FILE *f = fopen("test_double4.ptx", "w");
fprintf(f, "%s", buf);
fclose(f);
free(buf);
cl_kernel kernel = clCreateKernel(program, "test_double4", &err);
CL(err);
CL(clSetKernelArg(kernel, 0, sizeof(d_r), &d_r));
CL(clSetKernelArg(kernel, 1, sizeof(d_v), &d_v));
double elapsed = 0;
int i;
for (i = 0; i < 1000; ++i) {
cl_event event;
CL(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, glob_size, work_size, 0, NULL, &event));
CL(clWaitForEvents(1, &event));
cl_ulong start, end;
CL(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(start), &start, NULL));
CL(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL));
CL(clReleaseEvent(event));
elapsed += ((end-start) - elapsed) / (i+1);
}
CL(clReleaseKernel(kernel));
CL(clReleaseProgram(program));
CL(clReleaseMemObject(d_r));
CL(clReleaseMemObject(d_v));
CL(clReleaseCommandQueue(queue));
CL(clReleaseContext(context));
printf("%s: \033[1m%.3g ms\033[0m\n", argv[0], elapsed*1e-6);
return 0;
}
static const char *cl_error(cl_int err)
{
switch (err) {
case CL_SUCCESS:
return "success";
case CL_DEVICE_NOT_FOUND:
return "device not found";
case CL_DEVICE_NOT_AVAILABLE:
return "device not available";
case CL_COMPILER_NOT_AVAILABLE:
return "compiler not available";
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
return "mem object allocation failure";
case CL_OUT_OF_RESOURCES:
return "out of resources";
case CL_OUT_OF_HOST_MEMORY:
return "out of host memory";
case CL_PROFILING_INFO_NOT_AVAILABLE:
return "profiling info not available";
case CL_MEM_COPY_OVERLAP:
return "mem copy overlap";
case CL_IMAGE_FORMAT_MISMATCH:
return "image format mismatch";
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return "image format not supported";
case CL_BUILD_PROGRAM_FAILURE:
return "build program failure";
case CL_MAP_FAILURE:
return "map failure";
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
return "misaligned sub buffer offset";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
return "exec status error for events in wait list";
#if CL_VERSION_1_2
case CL_COMPILE_PROGRAM_FAILURE:
return "compile program failure";
case CL_LINKER_NOT_AVAILABLE:
return "linker not available";
case CL_LINK_PROGRAM_FAILURE:
return "link program failure";
case CL_DEVICE_PARTITION_FAILED:
return "device partition failed";
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
return "kernel arg info not available";
#endif
case CL_INVALID_VALUE:
return "invalid value";
case CL_INVALID_DEVICE_TYPE:
return "invalid device type";
case CL_INVALID_PLATFORM:
return "invalid platform";
case CL_INVALID_DEVICE:
return "invalid device";
case CL_INVALID_CONTEXT:
return "invalid context";
case CL_INVALID_QUEUE_PROPERTIES:
return "invalid queue properties";
case CL_INVALID_COMMAND_QUEUE:
return "invalid command queue";
case CL_INVALID_HOST_PTR:
return "invalid host ptr";
case CL_INVALID_MEM_OBJECT:
return "invalid mem object";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return "invalid image format descriptor";
case CL_INVALID_IMAGE_SIZE:
return "invalid image size";
case CL_INVALID_SAMPLER:
return "invalid sampler";
case CL_INVALID_BINARY:
return "invalid binary";
case CL_INVALID_BUILD_OPTIONS:
return "invalid build options";
case CL_INVALID_PROGRAM:
return "invalid program";
case CL_INVALID_PROGRAM_EXECUTABLE:
return "invalid program executable";
case CL_INVALID_KERNEL_NAME:
return "invalid kernel name";
case CL_INVALID_KERNEL_DEFINITION:
return "invalid kernel definition";
case CL_INVALID_KERNEL:
return "invalid kernel";
case CL_INVALID_ARG_INDEX:
return "invalid arg index";
case CL_INVALID_ARG_VALUE:
return "invalid arg value";
case CL_INVALID_ARG_SIZE:
return "invalid arg size";
case CL_INVALID_KERNEL_ARGS:
return "invalid kernel args";
case CL_INVALID_WORK_DIMENSION:
return "invalid work dimension";
case CL_INVALID_WORK_GROUP_SIZE:
return "invalid work group size";
case CL_INVALID_WORK_ITEM_SIZE:
return "invalid work item size";
case CL_INVALID_GLOBAL_OFFSET:
return "invalid global offset";
case CL_INVALID_EVENT_WAIT_LIST:
return "invalid event wait list";
case CL_INVALID_EVENT:
return "invalid event";
case CL_INVALID_OPERATION:
return "invalid operation";
case CL_INVALID_GL_OBJECT:
return "invalid gl object";
case CL_INVALID_BUFFER_SIZE:
return "invalid buffer size";
case CL_INVALID_MIP_LEVEL:
return "invalid mip level";
case CL_INVALID_GLOBAL_WORK_SIZE:
return "invalid global work size";
case CL_INVALID_PROPERTY:
return "invalid property";
#if CL_VERSION_1_2
case CL_INVALID_IMAGE_DESCRIPTOR:
return "invalid image descriptor";
case CL_INVALID_COMPILER_OPTIONS:
return "invalid compiler options";
case CL_INVALID_LINKER_OPTIONS:
return "invalid linker options";
case CL_INVALID_DEVICE_PARTITION_COUNT:
return "invalid device partition count";
#endif
default:
return "unknown error";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment