Skip to content

Instantly share code, notes, and snippets.

@jakebolewski
Created November 13, 2013 16:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jakebolewski/7452048 to your computer and use it in GitHub Desktop.
Save jakebolewski/7452048 to your computer and use it in GitHub Desktop.
low level opencl example
{
"metadata": {
"language": "Julia",
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"The complete Low Level OpenCL API is Exposed Through the Innner 'api' Module"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import OpenCL\n",
"const cl = OpenCL;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"Warning: redefining constant cl\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"test_source = \"\n",
"__kernel void sum(__global const float *a,\n",
" __global const float *b, \n",
" __global float *c,\n",
" const unsigned int count)\n",
"{\n",
" int gid = get_global_id(0);\n",
" if (gid < count) {\n",
" c[gid] = a[gid] + b[gid];\n",
" }\n",
"}\";"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"device = first(cl.devices())\n",
"\n",
"length = 1024\n",
"h_a = Array(cl.CL_float, length)\n",
"h_b = Array(cl.CL_float, length)\n",
"h_c = Array(cl.CL_float, length)\n",
"h_d = Array(cl.CL_float, length)\n",
"h_e = Array(cl.CL_float, length)\n",
"h_f = Array(cl.CL_float, length)\n",
"h_g = Array(cl.CL_float, length)\n",
"\n",
"for i in 1:length\n",
" h_a[i] = cl.cl_float(rand())\n",
" h_b[i] = cl.cl_float(rand())\n",
" h_e[i] = cl.cl_float(rand())\n",
" h_g[i] = cl.cl_float(rand())\n",
"end \n",
" \n",
"err_code = Array(cl.CL_int, 1)\n",
"\n",
"# create compute context (TODO: fails if function ptr's not passed...)\n",
"ctx_id = cl.api.clCreateContext(C_NULL, 1, [device.id], \n",
" cl.ctx_callback_ptr, \n",
" cl.raise_context_error, \n",
" err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Failed to create context\")\n",
"end\n",
"\n",
"q_id = cl.api.clCreateCommandQueue(ctx_id, device.id, 0, err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Failed to create command queue\")\n",
"end\n",
"\n",
"# create program\n",
"bytesource = bytestring(test_source)\n",
"prg_id = cl.api.clCreateProgramWithSource(ctx_id, 1, [bytesource], C_NULL, err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Failed to create program\")\n",
"end\n",
"\n",
"# build program\n",
"err = cl.api.clBuildProgram(prg_id, 0, C_NULL, C_NULL, C_NULL, C_NULL)\n",
"if err != cl.CL_SUCCESS\n",
" error(\"Failed to build program\")\n",
"end\n",
" \n",
"# create compute kernel\n",
"k_id = cl.api.clCreateKernel(prg_id, \"sum\", err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Failed to create compute kernel\")\n",
"end\n",
"\n",
"# create input array in device memory\n",
"Aid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR,\n",
" sizeof(cl.CL_float) * length, h_a, err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Error creating buffer A\")\n",
"end\n",
"Bid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_READ_ONLY | cl.CL_MEM_COPY_HOST_PTR,\n",
" sizeof(cl.CL_float) * length, h_b, err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Error creating buffer B\")\n",
"end\n",
"Eid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_COPY_HOST_PTR,\n",
" sizeof(cl.CL_float) * length, h_e, err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Error creating buffer E\")\n",
"end\n",
"Gid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY | cl.CL_MEM_COPY_HOST_PTR,\n",
" sizeof(cl.CL_float) * length, h_g, err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Error creating buffer G\")\n",
"end\n",
"\n",
"# create output arrays in device memory\n",
"\n",
"Cid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_READ_WRITE,\n",
" sizeof(cl.CL_float) * length, C_NULL, err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Error creating buffer C\")\n",
"end\n",
"Did = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_READ_WRITE, \n",
" sizeof(cl.CL_float) * length, C_NULL, err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Error creating buffer D\")\n",
"end\n",
"Fid = cl.api.clCreateBuffer(ctx_id, cl.CL_MEM_WRITE_ONLY, \n",
" sizeof(cl.CL_float) * length, C_NULL, err_code)\n",
"if err_code[1] != cl.CL_SUCCESS\n",
" error(\"Error creating buffer F\")\n",
"end\n",
"\n",
"err = cl.api.clSetKernelArg(k_id, 0, sizeof(cl.CL_mem), [Aid])\n",
"err |= cl.api.clSetKernelArg(k_id, 1, sizeof(cl.CL_mem), [Bid])\n",
"err |= cl.api.clSetKernelArg(k_id, 2, sizeof(cl.CL_mem), [Cid])\n",
"err |= cl.api.clSetKernelArg(k_id, 3, sizeof(cl.CL_uint), cl.CL_uint[length])\n",
"if err != cl.CL_SUCCESS\n",
" error(\"Error setting kernel 1 args\")\n",
"end\n",
" \n",
"nglobal = Csize_t[length,]\n",
"err = cl.api.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL,\n",
" nglobal, C_NULL, 0, C_NULL, C_NULL)\n",
"if err != cl.CL_SUCCESS\n",
" error(\"Failed to execute kernel 1\")\n",
"end\n",
"\n",
"err = cl.api.clSetKernelArg(k_id, 0, sizeof(cl.CL_mem), [Eid])\n",
"err |= cl.api.clSetKernelArg(k_id, 1, sizeof(cl.CL_mem), [Cid])\n",
"err |= cl.api.clSetKernelArg(k_id, 2, sizeof(cl.CL_mem), [Did])\n",
"if err != cl.CL_SUCCESS\n",
" error(\"Error setting kernel 2 args\")\n",
"end\n",
"err = cl.api.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL,\n",
" nglobal, C_NULL, 0, C_NULL, C_NULL)\n",
"if err != cl.CL_SUCCESS\n",
" error(\"Failed to execute kernel 2\")\n",
"end\n",
"\n",
"err = cl.api.clSetKernelArg(k_id, 0, sizeof(cl.CL_mem), [Gid])\n",
"err |= cl.api.clSetKernelArg(k_id, 1, sizeof(cl.CL_mem), [Did])\n",
"err |= cl.api.clSetKernelArg(k_id, 2, sizeof(cl.CL_mem), [Fid])\n",
"if err != cl.CL_SUCCESS\n",
" error(\"Error setting kernel 3 args\")\n",
"end\n",
"err = cl.api.clEnqueueNDRangeKernel(q_id, k_id, 1, C_NULL,\n",
" nglobal, C_NULL, 0, C_NULL, C_NULL)\n",
"if err != cl.CL_SUCCESS\n",
" error(\"Failed to execute kernel 3\")\n",
"end\n",
"\n",
"# read back the result from compute device...\n",
"err = cl.api.clEnqueueReadBuffer(q_id, Fid, cl.CL_TRUE, 0,\n",
" sizeof(cl.CL_float) * length, h_f, 0, C_NULL, C_NULL)\n",
"if err != cl.CL_SUCCESS\n",
" error(\"Failed to read output array\")\n",
"end\n",
"\n",
"# test results\n",
"ncorrect = 0\n",
"for i in 1:length\n",
" tmp = h_a[i] + h_b[i] + h_e[i] + h_g[i]\n",
" if isapprox(tmp, h_f[i])\n",
" ncorrect += 1\n",
" end\n",
"end\n",
"if ncorrect == length\n",
" info(\"Success!\")\n",
"else\n",
" error(\"Results are incorrect!\")\n",
"end"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"INFO: Success!"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment