Created
September 21, 2018 14:33
-
-
Save heatblazer/020cef705aae9621285c6a544f337a8a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** create that file w content vector_add_kernel.cl | |
__kernel void vector_add(__global int* A, __global int* B, __global int *C) | |
{ | |
// get the idx of the current element | |
int i = get_global_id(0); | |
C[i] = A[i] + B[i]; | |
} | |
*/ | |
/** | |
make / compile with: | |
gcc -o test *.c -l opencl | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#ifdef __APPLE__ | |
#include "OpenCL/opencl.h" | |
#else | |
#include <CL/cl.h> | |
#endif | |
#define MAX_SRC_SIZE 0x100000 | |
int main(int argc, char** argv) | |
{ | |
int i ; | |
const int LIST_SIZE = 1024; | |
int* A = (int*) malloc(sizeof(int) * LIST_SIZE); | |
int* B = (int*) malloc(sizeof(int) * LIST_SIZE); | |
for(i=0; i < LIST_SIZE; i++) | |
{ | |
A[i] = i; | |
B[i] = LIST_SIZE-i; | |
} | |
FILE* fp = 0; | |
char* src_str = 0; | |
size_t source_size = 0; | |
fp= fopen("vector_add_kernel.cl", "r"); | |
if (!fp) | |
exit(-1); | |
src_str = (char*)malloc(MAX_SRC_SIZE); | |
source_size = fread(src_str, 1, MAX_SRC_SIZE, fp); | |
fclose(fp); | |
cl_platform_id platform_id = NULL; | |
cl_device_id device_id = NULL; | |
cl_uint ret_num_devices; | |
cl_uint ret_num_platforms; | |
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); | |
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 1, | |
&device_id, &ret_num_devices); | |
cl_context clctx = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); | |
cl_command_queue cmd_queue = clCreateCommandQueue(clctx, device_id, 0, &ret); | |
cl_mem aMemObj = clCreateBuffer(clctx, CL_MEM_READ_ONLY, | |
LIST_SIZE* sizeof(int), NULL, &ret); | |
cl_mem bMemObj = clCreateBuffer(clctx, CL_MEM_READ_ONLY, | |
LIST_SIZE* sizeof(int), NULL, &ret); | |
cl_mem cMemObj = clCreateBuffer(clctx, CL_MEM_WRITE_ONLY, | |
LIST_SIZE* sizeof(int), NULL, &ret); | |
ret = clEnqueueWriteBuffer(cmd_queue, aMemObj, CL_TRUE, 0, | |
LIST_SIZE * sizeof(int), A, 0, NULL, NULL); | |
ret = clEnqueueWriteBuffer(cmd_queue, bMemObj, CL_TRUE, 0, | |
LIST_SIZE * sizeof(int), B, 0, NULL, NULL); | |
cl_program program = clCreateProgramWithSource(clctx, 1, | |
(const char**)&src_str, (const size_t*)&source_size, &ret); | |
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); | |
cl_kernel kernel = clCreateKernel(program, "vector_add", &ret); | |
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&aMemObj); | |
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&bMemObj); | |
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&cMemObj); | |
// execute the ocl kernel on the list | |
size_t global_item_size = LIST_SIZE; | |
size_t local_item_size = 64; // process in groups of 64 | |
ret = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, | |
&global_item_size, &local_item_size, 0, NULL, NULL); | |
// read the mem buffer for C array ot the device | |
int* C = (int*) malloc(sizeof(int) * LIST_SIZE); | |
ret = clEnqueueReadBuffer(cmd_queue, cMemObj, CL_TRUE, 0, | |
LIST_SIZE* sizeof(int) , C, 0, NULL, NULL); | |
for(i=0; i < LIST_SIZE; ++i) | |
{ | |
printf("%d + %d = %d\r\n", A[i], B[i], C[i]); | |
} | |
clFlush(cmd_queue); | |
clFinish(cmd_queue); | |
clReleaseKernel(kernel); | |
clReleaseProgram(program); | |
clReleaseMemObject(aMemObj); | |
clReleaseMemObject(bMemObj); | |
clReleaseMemObject(cMemObj); | |
clReleaseCommandQueue(cmd_queue); | |
clReleaseContext(clctx); | |
free(A); | |
free(B); | |
free(C); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment