Skip to content

Instantly share code, notes, and snippets.

@heatblazer
Created September 21, 2018 14:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save heatblazer/020cef705aae9621285c6a544f337a8a to your computer and use it in GitHub Desktop.
Save heatblazer/020cef705aae9621285c6a544f337a8a to your computer and use it in GitHub Desktop.
/** create that file w content vector_add_kernel.cl
__kernel void vector_add(__global int* A, __global int* B, __global int *C)
{
// get the idx of the current element
int i = get_global_id(0);
C[i] = A[i] + B[i];
}
*/
/**
make / compile with:
gcc -o test *.c -l opencl
*/
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include "OpenCL/opencl.h"
#else
#include <CL/cl.h>
#endif
#define MAX_SRC_SIZE 0x100000
int main(int argc, char** argv)
{
int i ;
const int LIST_SIZE = 1024;
int* A = (int*) malloc(sizeof(int) * LIST_SIZE);
int* B = (int*) malloc(sizeof(int) * LIST_SIZE);
for(i=0; i < LIST_SIZE; i++)
{
A[i] = i;
B[i] = LIST_SIZE-i;
}
FILE* fp = 0;
char* src_str = 0;
size_t source_size = 0;
fp= fopen("vector_add_kernel.cl", "r");
if (!fp)
exit(-1);
src_str = (char*)malloc(MAX_SRC_SIZE);
source_size = fread(src_str, 1, MAX_SRC_SIZE, fp);
fclose(fp);
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 1,
&device_id, &ret_num_devices);
cl_context clctx = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
cl_command_queue cmd_queue = clCreateCommandQueue(clctx, device_id, 0, &ret);
cl_mem aMemObj = clCreateBuffer(clctx, CL_MEM_READ_ONLY,
LIST_SIZE* sizeof(int), NULL, &ret);
cl_mem bMemObj = clCreateBuffer(clctx, CL_MEM_READ_ONLY,
LIST_SIZE* sizeof(int), NULL, &ret);
cl_mem cMemObj = clCreateBuffer(clctx, CL_MEM_WRITE_ONLY,
LIST_SIZE* sizeof(int), NULL, &ret);
ret = clEnqueueWriteBuffer(cmd_queue, aMemObj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), A, 0, NULL, NULL);
ret = clEnqueueWriteBuffer(cmd_queue, bMemObj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), B, 0, NULL, NULL);
cl_program program = clCreateProgramWithSource(clctx, 1,
(const char**)&src_str, (const size_t*)&source_size, &ret);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&aMemObj);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&bMemObj);
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&cMemObj);
// execute the ocl kernel on the list
size_t global_item_size = LIST_SIZE;
size_t local_item_size = 64; // process in groups of 64
ret = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);
// read the mem buffer for C array ot the device
int* C = (int*) malloc(sizeof(int) * LIST_SIZE);
ret = clEnqueueReadBuffer(cmd_queue, cMemObj, CL_TRUE, 0,
LIST_SIZE* sizeof(int) , C, 0, NULL, NULL);
for(i=0; i < LIST_SIZE; ++i)
{
printf("%d + %d = %d\r\n", A[i], B[i], C[i]);
}
clFlush(cmd_queue);
clFinish(cmd_queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseMemObject(aMemObj);
clReleaseMemObject(bMemObj);
clReleaseMemObject(cMemObj);
clReleaseCommandQueue(cmd_queue);
clReleaseContext(clctx);
free(A);
free(B);
free(C);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment