Skip to content

Instantly share code, notes, and snippets.

@nomaddo
Created August 30, 2018 22:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nomaddo/d6a02ad8cd159d84fcae4e208415db42 to your computer and use it in GitHub Desktop.
Save nomaddo/d6a02ad8cd159d84fcae4e208415db42 to your computer and use it in GitHub Desktop.
opencl example
__global void hello(__global float * a, float b)
{
int gid = get_global_id(0);
a[gid] = b;
}
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <assert.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define MAX_SOURCE_SIZE (0x100000)
int main(int argc, char * argv[]) {
char * fileName = "./hello.cl";
char * kernel_name = "hello";
int arg_num = 1;
int N = 8;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_platform_id platform_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
FILE *fp;
char *source_str;
size_t source_size;
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
ret = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
assert (ret == CL_SUCCESS);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
kernel = clCreateKernel(program, kernel_name, &ret);
int len = N * 20000;
float * val = malloc(len * sizeof(float));
for (int i = 0; i < len; i++) {
val[i] = (float)i;
}
/* arguments */
cl_mem arg;
arg = clCreateBuffer(context, CL_MEM_READ_WRITE, len * sizeof(float), NULL, &ret);
clEnqueueWriteBuffer(command_queue, arg, CL_TRUE, 0, len * sizeof(float), val, 0, NULL, NULL);
assert(ret == CL_SUCCESS);
ret = clSetKernelArg (kernel, 0, sizeof(cl_mem), (void*) &arg);
assert(ret == CL_SUCCESS);
float f = 3.14;
ret = clSetKernelArg (kernel, 1, sizeof(float), (void*) &f);
assert(ret == CL_SUCCESS);
size_t local;
ret = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
clock_t begin = clock();
size_t global_item_size[] = { N };
ret = clEnqueueNDRangeKernel (command_queue, kernel, 1, NULL,
&global_item_size, NULL, 0, NULL, NULL);
ret = clFinish (command_queue);
if (! (ret == CL_SUCCESS)) {
printf ("error code: %d\n", ret);
assert (0);
}
ret = clEnqueueReadBuffer(command_queue, arg, CL_TRUE, 0, sizeof(cl_float) * N, val, 0, NULL, NULL);
assert (ret == CL_SUCCESS);
for (int i = 0; i < N; i++) {
printf ("%lf\n", val[i]);
}
ret = clFlush(command_queue);
assert (ret == CL_SUCCESS);
clock_t end = clock();
double runtime = (double)(end - begin) / CLOCKS_PER_SEC;
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(arg);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
printf("Runtime: %lfms\n", runtime);
free(source_str);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment