Created
February 21, 2022 00:45
-
-
Save philipturner/665457757356b03fb0c8490e84046c0a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#if canImport(Darwin) // Apple platforms | |
import OpenCL | |
#else | |
// Don't know how to import OpenCL on Linux yet. It should be possible if I | |
// wrap it in a Clang module and link that to Swift. | |
#endif | |
/* | |
Sourced from this online tutorial: | |
https://www.eriksmistad.no/getting-started-with-opencl-and-gpu-computing/ | |
In the same directory as this script: | |
- "vector_add_kernel.cl" | |
- source: | |
__kernel void vector_add(__global const int *A, __global const int *B, __global int *C) { | |
// Get the index of the current element to be processed | |
int i = get_global_id(0); | |
// Do the operation | |
C[i] = A[i] + B[i]; | |
} | |
*/ | |
let MAX_SOURCE_SIZE = 0x100000 | |
let LIST_SIZE = 1024 | |
let A = UnsafeMutablePointer<Int32>.allocate(capacity: LIST_SIZE) | |
let B = UnsafeMutablePointer<Int32>.allocate(capacity: LIST_SIZE) | |
for i in 0..<LIST_SIZE { | |
A[i] = Int32(i) | |
B[i] = Int32(LIST_SIZE - i) | |
} | |
// Load the kernel source code into the array source_str | |
guard let fp = fopen("vector_add_kernel.cl", "r") else { | |
fatalError("Failed to load kernel.") | |
} | |
let source_str = UnsafeMutablePointer<CChar>.allocate(capacity: MAX_SOURCE_SIZE) | |
var source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp) | |
fclose(fp) | |
// Get platform and device information | |
var platform_id: cl_platform_id? = nil | |
var device_id: cl_device_id? = nil | |
var ret_num_devices: cl_uint = 0 | |
var ret_num_platforms: cl_uint = 0 | |
var ret: cl_int = clGetPlatformIDs(1, &platform_id, &ret_num_platforms) | |
ret = clGetDeviceIDs(platform_id, cl_device_type(CL_DEVICE_TYPE_DEFAULT), 1, | |
&device_id, &ret_num_devices) | |
// Create OpenCL context | |
let context: cl_context = clCreateContext(nil, 1, &device_id, nil, nil, &ret) | |
// Create a command queue | |
let command_queue: cl_command_queue = clCreateCommandQueue(context, device_id, 0, | |
&ret) | |
// Create memory buffers on the device for each vector | |
var a_mem_obj: cl_mem = clCreateBuffer(context, cl_mem_flags(CL_MEM_READ_ONLY), | |
LIST_SIZE * MemoryLayout<Int32>.stride, nil, &ret) | |
var b_mem_obj: cl_mem = clCreateBuffer(context, cl_mem_flags(CL_MEM_READ_ONLY), | |
LIST_SIZE * MemoryLayout<Int32>.stride, nil, &ret) | |
var c_mem_obj: cl_mem = clCreateBuffer(context, cl_mem_flags(CL_MEM_READ_ONLY), | |
LIST_SIZE * MemoryLayout<Int32>.stride, nil, &ret) | |
// Copy the lists A and B to their respective memory buffers | |
ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, cl_bool(CL_TRUE), 0, | |
LIST_SIZE * MemoryLayout<Int32>.stride, A, 0, nil, nil) | |
ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, cl_bool(CL_TRUE), 0, | |
LIST_SIZE * MemoryLayout<Int32>.stride, B, 0, nil, nil) | |
// Create a program from the kernel source | |
var source_str_casted = unsafeBitCast(source_str, to: UnsafePointer<CChar>?.self) | |
let program: cl_program = clCreateProgramWithSource(context, 1, | |
&source_str_casted, &source_size, &ret) | |
// Build the program | |
ret = clBuildProgram(program, 1, &device_id, nil, nil, nil) | |
// Create the OpenCL kernel | |
let kernel: cl_kernel = clCreateKernel(program, "vector_add", &ret) | |
// Set the arguments of the kernel | |
let size_of_cl_mem = MemoryLayout<cl_mem>.stride | |
ret = clSetKernelArg(kernel, 0, size_of_cl_mem, &a_mem_obj) | |
ret = clSetKernelArg(kernel, 1, size_of_cl_mem, &b_mem_obj) | |
ret = clSetKernelArg(kernel, 2, size_of_cl_mem, &c_mem_obj) | |
// Execute the OpenCL kernel on the list | |
var global_item_size = LIST_SIZE | |
var local_item_size = 64 | |
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, nil, &global_item_size, | |
&local_item_size, 0, nil, nil) | |
// Read the memory buffer C on the device to the local variable C | |
let C = UnsafeMutablePointer<Int32>.allocate(capacity: LIST_SIZE) | |
ret = clEnqueueReadBuffer(command_queue, c_mem_obj, cl_bool(CL_TRUE), 0, | |
LIST_SIZE * MemoryLayout<Int32>.stride, C, 0, nil, nil) | |
// Display the result to the screen | |
for i in 0..<LIST_SIZE { | |
print("\(A[i]) + \(B[i]) = \(C[i])") | |
/* | |
Output: | |
0 + 1024 = 1024 | |
1 + 1023 = 1024 | |
2 + 1022 = 1024 | |
3 + 1021 = 1024 | |
4 + 1020 = 1024 | |
5 + 1019 = 1024 | |
6 + 1018 = 1024 | |
... | |
1019 + 5 = 1024 | |
1020 + 4 = 1024 | |
1021 + 3 = 1024 | |
1022 + 2 = 1024 | |
1023 + 1 = 1024 | |
*/ | |
} | |
// Clean up | |
ret = clFlush(command_queue) | |
ret = clFinish(command_queue) | |
ret = clReleaseKernel(kernel) | |
ret = clReleaseProgram(program) | |
ret = clReleaseMemObject(a_mem_obj) | |
ret = clReleaseMemObject(b_mem_obj) | |
ret = clReleaseMemObject(c_mem_obj) | |
ret = clReleaseCommandQueue(command_queue) | |
ret = clReleaseContext(context) | |
free(A) | |
free(B) | |
free(C) | |
exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment