Created
December 29, 2016 06:56
-
-
Save ShigekiKarita/d0f1c3cdaedb97361ae1fb01e7d49352 to your computer and use it in GitHub Desktop.
ATS2 with OpenCL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "share/atspre_define.hats" | |
#include "share/atspre_staload.hats" | |
(* README | |
original : | |
https://github.com/smistad/OpenCL-Getting-Started | |
compile with CUDA devices: | |
patscc -o test ./opencl_test.dats -L/usr/local/cuda/lib64 -lOpenCL | |
*) | |
%{^ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <assert.h> | |
#ifdef __APPLE__ | |
#include <OpenCL/opencl.h> | |
#else | |
#include <CL/cl.h> | |
#endif | |
#define MAX_SOURCE_SIZE (0x100000) | |
%} | |
%{ | |
int main_c(void) { | |
// Create the two input vectors | |
int i; | |
const int LIST_SIZE = 102400; | |
int *A = (int*)malloc(sizeof(int)*LIST_SIZE); | |
int *B = (int*)malloc(sizeof(int)*LIST_SIZE); | |
for(i = 0; i < LIST_SIZE; i++) { | |
A[i] = i; | |
B[i] = LIST_SIZE - i; | |
} | |
// Load the kernel source code into the array source_str | |
FILE *fp; | |
char *source_str; | |
size_t source_size; | |
fp = fopen("vector_add_kernel.cl", "r"); | |
if (!fp) { | |
fprintf(stderr, "Failed to load kernel.\n"); | |
exit(1); | |
} | |
source_str = (char*)malloc(MAX_SOURCE_SIZE); | |
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp); | |
fclose( fp ); | |
// Get platform and device information | |
cl_platform_id platform_id = NULL; | |
cl_device_id device_id = NULL; | |
cl_uint ret_num_devices; | |
cl_uint ret_num_platforms; | |
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); | |
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_ALL, 1, | |
&device_id, &ret_num_devices); | |
// Create an OpenCL context | |
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); | |
// Create a command queue | |
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); | |
// Create memory buffers on the device for each vector | |
cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, | |
LIST_SIZE * sizeof(int), NULL, &ret); | |
cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, | |
LIST_SIZE * sizeof(int), NULL, &ret); | |
cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, | |
LIST_SIZE * sizeof(int), NULL, &ret); | |
// Copy the lists A and B to their respective memory buffers | |
ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, | |
LIST_SIZE * sizeof(int), A, 0, NULL, NULL); | |
ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, | |
LIST_SIZE * sizeof(int), B, 0, NULL, NULL); | |
// Create a program from the kernel source | |
cl_program program = clCreateProgramWithSource(context, 1, | |
(const char **)&source_str, (const size_t *)&source_size, &ret); | |
// Build the program | |
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); | |
// Create the OpenCL kernel | |
cl_kernel kernel = clCreateKernel(program, "vector_add", &ret); | |
// Set the arguments of the kernel | |
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj); | |
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj); | |
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj); | |
// Execute the OpenCL kernel on the list | |
size_t global_item_size = LIST_SIZE; // Process the entire lists | |
size_t local_item_size = 64; // Process in groups of 64 | |
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, | |
&global_item_size, &local_item_size, 0, NULL, NULL); | |
// Read the memory buffer C on the device to the local variable C | |
int *C = (int*)malloc(sizeof(int)*LIST_SIZE); | |
ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, | |
LIST_SIZE * sizeof(int), C, 0, NULL, NULL); | |
// Display the result to the screen | |
for(i = 0; i < LIST_SIZE; i++) { | |
assert(A[i] + B[i] == C[i]); | |
printf("A[i](%d) + B[i](%d) == C[i](%d)\n", A[i], B[i], C[i]); | |
} | |
// Clean up | |
ret = clFlush(command_queue); | |
ret = clFinish(command_queue); | |
ret = clReleaseKernel(kernel); | |
ret = clReleaseProgram(program); | |
ret = clReleaseMemObject(a_mem_obj); | |
ret = clReleaseMemObject(b_mem_obj); | |
ret = clReleaseMemObject(c_mem_obj); | |
ret = clReleaseCommandQueue(command_queue); | |
ret = clReleaseContext(context); | |
free(A); | |
free(B); | |
free(C); | |
return 0; | |
} | |
%} | |
extern fun syntax(): void = "mac#syntax" | |
extern fun main_c(): int = "mac#main_c" | |
exception Error of () | |
implement main0() = | |
if main_c() = 0 then () else $raise Error () |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__kernel void vector_add(__global int *A, __global int *B, __global int *C) { | |
// Get the index of the current element | |
int i = get_global_id(0); | |
// Do the operation | |
C[i] = A[i] + B[i]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment