Skip to content

Instantly share code, notes, and snippets.

@ranocha
Last active August 14, 2018 09:26
Show Gist options
  • Save ranocha/d249a7b698c36687ce34d4255d2b4c0d to your computer and use it in GitHub Desktop.
Save ranocha/d249a7b698c36687ce34d4255d2b4c0d to your computer and use it in GitHub Desktop.
CLBlast_trsv_event
// compile and run with
// gcc -I../include -O2 -Wall sgemv.c -o sgemv -lOpenCL -L . -lclblast; ./sgemv
// in the build directory of CLBlast
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
#include <clblast_c.h>
int main(void) {
// OpenCL platform/device settings
const size_t platform_id = 0;
const size_t device_id = 0;
// Example arguments
const size_t m = 128;
const size_t n = 289;
const float alpha = 0.7;
const float beta = 0.0;
const size_t a_ld = n;
// Initializes the OpenCL platform
cl_uint num_platforms;
clGetPlatformIDs(0, NULL, &num_platforms);
cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
clGetPlatformIDs(num_platforms, platforms, NULL);
cl_platform_id platform = platforms[platform_id];
// Initializes the OpenCL device
cl_uint num_devices;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
cl_device_id device = devices[device_id];
// Creates the OpenCL context, queue, and an event
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
cl_event event = NULL;
// Populate host data structures with some example data
float* host_a = (float*)malloc(sizeof(float)*m*n);
float* host_x = (float*)malloc(sizeof(float)*n);
float* host_y = (float*)malloc(sizeof(float)*m);
for (size_t i=0; i<m*n; ++i) { host_a[i] = 12.193; }
for (size_t i=0; i<n; ++i) { host_x[i] = -8.199; }
for (size_t i=0; i<m; ++i) { host_y[i] = 0.0; }
// Copy the data-structures to the device
cl_mem device_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, NULL);
cl_mem device_x = clCreateBuffer(context, CL_MEM_READ_WRITE, n*sizeof(float), NULL, NULL);
cl_mem device_y = clCreateBuffer(context, CL_MEM_READ_WRITE, m*sizeof(float), NULL, NULL);
clEnqueueWriteBuffer(queue, device_a, CL_TRUE, 0, m*n*sizeof(float), host_a, 0, NULL, NULL);
clEnqueueWriteBuffer(queue, device_x, CL_TRUE, 0, n*sizeof(float), host_x, 0, NULL, NULL);
clEnqueueWriteBuffer(queue, device_y, CL_TRUE, 0, m*sizeof(float), host_y, 0, NULL, NULL);
// Call the SGEMV routine.
CLBlastStatusCode status = CLBlastSgemv(CLBlastLayoutRowMajor, CLBlastTransposeNo,
m, n,
alpha,
device_a, 0, a_ld,
device_x, 0, 1,
beta,
device_y, 0, 1,
&queue, &event);
// Wait for completion
if (status == CLBlastSuccess) {
printf("Event: %ld\n", (long)event);
cl_int wait_status = clWaitForEvents(1, &event);
printf("Status: %d\n", wait_status);
cl_int release_status = clReleaseEvent(event);
printf("Status: %d\n", release_status);
}
// Example completed. See "clblast_c.h" for status codes (0 -> success).
printf("Completed SGEMV with status %d\n", status);
// Clean-up
free(platforms);
free(devices);
free(host_a);
free(host_x);
free(host_y);
clReleaseMemObject(device_a);
clReleaseMemObject(device_x);
clReleaseMemObject(device_y);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
// compile and run with
// gcc -I../include -O2 -Wall strsv.c -o strsv -lOpenCL -L . -lclblast; ./strsv
// in the build directory of CLBlast
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
#include <clblast_c.h>
int main(void) {
// OpenCL platform/device settings
const size_t platform_id = 0;
const size_t device_id = 0;
// Example arguments
const size_t n = 50;
const size_t a_ld = n;
// Initializes the OpenCL platform
cl_uint num_platforms;
clGetPlatformIDs(0, NULL, &num_platforms);
cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
clGetPlatformIDs(num_platforms, platforms, NULL);
cl_platform_id platform = platforms[platform_id];
// Initializes the OpenCL device
cl_uint num_devices;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
cl_device_id device = devices[device_id];
// Creates the OpenCL context, queue, and an event
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
cl_event event = NULL;
// Populate host data structures with some example data
float* host_a = (float*)malloc(sizeof(float)*n*n);
float* host_x = (float*)malloc(sizeof(float)*n);
for (size_t i=0; i<n*n; ++i) { host_a[i] = i+1; }
for (size_t i=0; i<n; ++i) { host_x[i] = 1.0; }
// Copy the data-structures to the device
cl_mem device_a = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(float), NULL, NULL);
cl_mem device_x = clCreateBuffer(context, CL_MEM_READ_WRITE, n*sizeof(float), NULL, NULL);
clEnqueueWriteBuffer(queue, device_a, CL_TRUE, 0, n*n*sizeof(float), host_a, 0, NULL, NULL);
clEnqueueWriteBuffer(queue, device_x, CL_TRUE, 0, n*sizeof(float), host_x, 0, NULL, NULL);
// Call the STRSV routine.
CLBlastStatusCode status = CLBlastStrsv(CLBlastLayoutRowMajor, CLBlastTriangleUpper,
CLBlastTransposeNo, CLBlastDiagonalNonUnit,
n,
device_a, 0, a_ld,
device_x, 0, 1,
&queue, &event);
// Wait for completion
if (status == CLBlastSuccess) {
printf("Event: %ld\n", (long)event);
int wait_status = clWaitForEvents(1, &event);
printf("Status: %d\n", wait_status);
cl_int release_status = clReleaseEvent(event);
printf("Status: %d\n", release_status);
}
// Example completed. See "clblast_c.h" for status codes (0 -> success).
printf("Completed STRSV with status %d\n", status);
// Clean-up
free(platforms);
free(devices);
free(host_a);
free(host_x);
clReleaseMemObject(device_a);
clReleaseMemObject(device_x);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment