Created
December 22, 2017 02:56
-
-
Save greenbagels/02fe8d914125aefcfd59506d855db738 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <time.h> | |
#include <stdlib.h> | |
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS | |
#include <CL/cl.h> | |
const char* kern_src = | |
"kernel void para_transply(global double *input, global double *output)\n" | |
"{\n" | |
" int i = get_global_id(0);\n" | |
" int j = get_global_id(1);\n" | |
" for (int k = 0; k < 1024; k++)\n" | |
" {\n" | |
" output[i*1024+j] += input[i*1024+k] * input[j*1024+k];\n" | |
" }\n" | |
"}\n"; | |
int serial_transply(double *input, double *output); | |
int main() | |
{ | |
static double array[1024*1024]; | |
static double serial[1024*1024]; | |
static double paral[1024*1024]; | |
struct timespec start, end; | |
double elapsed, ops = 2.0*1024.*1024*1024; | |
for (int i = 0; i < 1024; i++) | |
{ | |
for (int j = 0; j < 1024; j++) | |
{ | |
array[i*1024+j] = (double)rand() / (double)RAND_MAX; | |
} | |
} | |
cl_int status; | |
cl_uint num_platforms = 0; | |
cl_platform_id *platforms = NULL; | |
status = clGetPlatformIDs(0, NULL, &num_platforms); | |
printf("Last error status: %d\n", status); | |
platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id)); | |
status = clGetPlatformIDs(num_platforms, platforms, NULL); | |
printf("Last error status: %d\n", status); | |
cl_uint num_devices = 0; | |
cl_device_id *devices = NULL; | |
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); | |
printf("Last error status: %d\n", status); | |
devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id)); | |
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); | |
printf("Last error status: %d\n", status); | |
size_t param_size; | |
clGetDeviceInfo(devices[0], CL_DEVICE_NAME, 0, NULL, ¶m_size); | |
char* value = (char*)malloc(param_size); | |
clGetDeviceInfo(devices[0], CL_DEVICE_NAME, param_size, value, NULL); | |
printf("Using Device %s\n", value); | |
free(value); | |
cl_context context = clCreateContext(NULL, num_devices, devices, NULL, NULL, &status); | |
printf("Last error status: %d\n", status); | |
cl_command_queue queue = clCreateCommandQueue(context, devices[0], 0, &status); | |
printf("Last error status: %d\n", status); | |
cl_mem inbuf = clCreateBuffer(context, CL_MEM_READ_ONLY, 1024*1024*sizeof(double), NULL, &status); | |
printf("Last error status: %d\n", status); | |
cl_mem outbuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 1024*1024*sizeof(double), NULL, &status); | |
printf("Last error status: %d\n", status); | |
status = clEnqueueWriteBuffer(queue, inbuf, CL_FALSE, 0, 1024*1024*sizeof(double), &array, 0, NULL, NULL); | |
printf("Last error status: %d\n", status); | |
status = clEnqueueWriteBuffer(queue, outbuf, CL_FALSE, 0, 1024*1024*sizeof(double), ¶l, 0, NULL, NULL); | |
printf("Last error status: %d\n", status); | |
cl_program prog = clCreateProgramWithSource(context, 1, (const char**)&kern_src, NULL, &status); | |
printf("Last error status: %d\n", status); | |
status = clBuildProgram(prog, num_devices, devices, NULL, NULL, NULL); | |
printf("Last error status: %d\n", status); | |
if (status == CL_BUILD_PROGRAM_FAILURE) | |
{ | |
size_t log_size; | |
clGetProgramBuildInfo(prog, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); | |
char *log = (char*)malloc(log_size); | |
clGetProgramBuildInfo(prog, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL); | |
printf("%s\n", log); | |
free(log); | |
} | |
cl_kernel kernel = clCreateKernel(prog, "para_transply", NULL); | |
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inbuf); | |
printf("Last error status: %d\n", status); | |
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outbuf); | |
printf("Last error status: %d\n", status); | |
size_t idxspace_size[2] = {1024, 1024}; | |
size_t wgroup_size[2] = {16, 16}; | |
clock_gettime(CLOCK_REALTIME, &start); | |
status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, idxspace_size, wgroup_size, 0, NULL, NULL); | |
clFinish(queue); | |
clock_gettime(CLOCK_REALTIME, &end); | |
printf("Last error status: %d\n", status); | |
elapsed = (double)(end.tv_sec - start.tv_sec) + (double)(end.tv_nsec - start.tv_nsec)/1000000000; | |
printf("\n>[///////|PARALLEL|///////]<\n"); | |
printf("%lf floating point operations completed\n", ops); | |
printf("Elapsed time: %lf seconds\n", elapsed); | |
printf("Effective speed: %lf GFLOPS\n\n", ops/elapsed/1000000000); | |
clEnqueueReadBuffer(queue, outbuf, CL_TRUE, 0, sizeof(double)*1024*1024, ¶l, 0, NULL, NULL); | |
printf("Last error status: %d\n", status); | |
clReleaseKernel(kernel); | |
clReleaseProgram(prog); | |
clReleaseCommandQueue(queue); | |
clReleaseMemObject(inbuf); | |
clReleaseMemObject(outbuf); | |
clReleaseContext(context); | |
free(platforms); | |
free(devices); | |
clock_gettime(CLOCK_REALTIME, &start); | |
serial_transply(array, serial); | |
clock_gettime(CLOCK_REALTIME, &end); | |
elapsed = (double)(end.tv_sec - start.tv_sec) + (double)(end.tv_nsec - start.tv_nsec)/1000000000; | |
printf(">[////////|SERIAL|////////]<\n"); | |
printf("%lf floating point operations completed\n", ops); | |
printf("Elapsed time: %lf seconds\n", elapsed); | |
printf("Effective speed: %lf GFLOPS\n\n", ops/elapsed/1000000000); | |
//verify | |
for (int i = 0; i < 1024; i++) | |
{ | |
for (int j = 0; j < 1024; j++) | |
{ | |
if (serial[i*1024+j] != paral[i*1024+j]) | |
{ | |
printf("Serial and parallel outputs do not match.\n"); | |
return -1; | |
} | |
} | |
} | |
printf("Serial and parallel outputs match.\n"); | |
return 0; | |
} | |
int serial_transply(double *input, double *output) | |
{ | |
for (int i = 0; i < 1024; i++) | |
{ | |
for (int j = 0; j < 1024; j++) | |
{ | |
for (int k = 0; k < 1024; k++) | |
{ | |
output[i*1024+j] += input[i*1024+k] * input[j*1024+k]; | |
} | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment