Skip to content

Instantly share code, notes, and snippets.

@greenbagels
Created December 22, 2017 02:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save greenbagels/02fe8d914125aefcfd59506d855db738 to your computer and use it in GitHub Desktop.
Save greenbagels/02fe8d914125aefcfd59506d855db738 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <CL/cl.h>
const char* kern_src =
"kernel void para_transply(global double *input, global double *output)\n"
"{\n"
" int i = get_global_id(0);\n"
" int j = get_global_id(1);\n"
" for (int k = 0; k < 1024; k++)\n"
" {\n"
" output[i*1024+j] += input[i*1024+k] * input[j*1024+k];\n"
" }\n"
"}\n";
int serial_transply(double *input, double *output);
int main()
{
static double array[1024*1024];
static double serial[1024*1024];
static double paral[1024*1024];
struct timespec start, end;
double elapsed, ops = 2.0*1024.*1024*1024;
for (int i = 0; i < 1024; i++)
{
for (int j = 0; j < 1024; j++)
{
array[i*1024+j] = (double)rand() / (double)RAND_MAX;
}
}
cl_int status;
cl_uint num_platforms = 0;
cl_platform_id *platforms = NULL;
status = clGetPlatformIDs(0, NULL, &num_platforms);
printf("Last error status: %d\n", status);
platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
status = clGetPlatformIDs(num_platforms, platforms, NULL);
printf("Last error status: %d\n", status);
cl_uint num_devices = 0;
cl_device_id *devices = NULL;
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
printf("Last error status: %d\n", status);
devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
printf("Last error status: %d\n", status);
size_t param_size;
clGetDeviceInfo(devices[0], CL_DEVICE_NAME, 0, NULL, &param_size);
char* value = (char*)malloc(param_size);
clGetDeviceInfo(devices[0], CL_DEVICE_NAME, param_size, value, NULL);
printf("Using Device %s\n", value);
free(value);
cl_context context = clCreateContext(NULL, num_devices, devices, NULL, NULL, &status);
printf("Last error status: %d\n", status);
cl_command_queue queue = clCreateCommandQueue(context, devices[0], 0, &status);
printf("Last error status: %d\n", status);
cl_mem inbuf = clCreateBuffer(context, CL_MEM_READ_ONLY, 1024*1024*sizeof(double), NULL, &status);
printf("Last error status: %d\n", status);
cl_mem outbuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 1024*1024*sizeof(double), NULL, &status);
printf("Last error status: %d\n", status);
status = clEnqueueWriteBuffer(queue, inbuf, CL_FALSE, 0, 1024*1024*sizeof(double), &array, 0, NULL, NULL);
printf("Last error status: %d\n", status);
status = clEnqueueWriteBuffer(queue, outbuf, CL_FALSE, 0, 1024*1024*sizeof(double), &paral, 0, NULL, NULL);
printf("Last error status: %d\n", status);
cl_program prog = clCreateProgramWithSource(context, 1, (const char**)&kern_src, NULL, &status);
printf("Last error status: %d\n", status);
status = clBuildProgram(prog, num_devices, devices, NULL, NULL, NULL);
printf("Last error status: %d\n", status);
if (status == CL_BUILD_PROGRAM_FAILURE)
{
size_t log_size;
clGetProgramBuildInfo(prog, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *log = (char*)malloc(log_size);
clGetProgramBuildInfo(prog, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
printf("%s\n", log);
free(log);
}
cl_kernel kernel = clCreateKernel(prog, "para_transply", NULL);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inbuf);
printf("Last error status: %d\n", status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outbuf);
printf("Last error status: %d\n", status);
size_t idxspace_size[2] = {1024, 1024};
size_t wgroup_size[2] = {16, 16};
clock_gettime(CLOCK_REALTIME, &start);
status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, idxspace_size, wgroup_size, 0, NULL, NULL);
clFinish(queue);
clock_gettime(CLOCK_REALTIME, &end);
printf("Last error status: %d\n", status);
elapsed = (double)(end.tv_sec - start.tv_sec) + (double)(end.tv_nsec - start.tv_nsec)/1000000000;
printf("\n>[///////|PARALLEL|///////]<\n");
printf("%lf floating point operations completed\n", ops);
printf("Elapsed time: %lf seconds\n", elapsed);
printf("Effective speed: %lf GFLOPS\n\n", ops/elapsed/1000000000);
clEnqueueReadBuffer(queue, outbuf, CL_TRUE, 0, sizeof(double)*1024*1024, &paral, 0, NULL, NULL);
printf("Last error status: %d\n", status);
clReleaseKernel(kernel);
clReleaseProgram(prog);
clReleaseCommandQueue(queue);
clReleaseMemObject(inbuf);
clReleaseMemObject(outbuf);
clReleaseContext(context);
free(platforms);
free(devices);
clock_gettime(CLOCK_REALTIME, &start);
serial_transply(array, serial);
clock_gettime(CLOCK_REALTIME, &end);
elapsed = (double)(end.tv_sec - start.tv_sec) + (double)(end.tv_nsec - start.tv_nsec)/1000000000;
printf(">[////////|SERIAL|////////]<\n");
printf("%lf floating point operations completed\n", ops);
printf("Elapsed time: %lf seconds\n", elapsed);
printf("Effective speed: %lf GFLOPS\n\n", ops/elapsed/1000000000);
//verify
for (int i = 0; i < 1024; i++)
{
for (int j = 0; j < 1024; j++)
{
if (serial[i*1024+j] != paral[i*1024+j])
{
printf("Serial and parallel outputs do not match.\n");
return -1;
}
}
}
printf("Serial and parallel outputs match.\n");
return 0;
}
int serial_transply(double *input, double *output)
{
for (int i = 0; i < 1024; i++)
{
for (int j = 0; j < 1024; j++)
{
for (int k = 0; k < 1024; k++)
{
output[i*1024+j] += input[i*1024+k] * input[j*1024+k];
}
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment