Skip to content

Instantly share code, notes, and snippets.

@Centrinia
Last active January 15, 2016 16:49
Show Gist options
  • Save Centrinia/96fbe78fbc8f3e986786 to your computer and use it in GitHub Desktop.
Save Centrinia/96fbe78fbc8f3e986786 to your computer and use it in GitHub Desktop.
exc@lambda ~/src/examples/opencl/prefix_sum $ optirun python3 prefix_sum.py
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
25 26 27 28 29 30 31]
(16,) None
<pyopencl.cffi_cl.CommandQueue object at 0x7f378fefcac8> (16,) None <pyopencl.cffi_cl.Buffer object at 0x7f3787cce470> <pyopencl.cffi_cl.Buffer object at 0x7f3787cd8d68> 0 1
Adding ap[54668944] and ap[109337888] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[164006832] and ap[218675776] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[273344720] and ap[328013664] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[382682608] and ap[437351552] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[492020496] and ap[546689440] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[601358384] and ap[656027328] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[710696272] and ap[765365216] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[820034160] and ap[874703104] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[929372048] and ap[984040992] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[1038709936] and ap[1093378880] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[1148047824] and ap[1202716768] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[1257385712] and ap[1312054656] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[1366723600] and ap[1421392544] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[1476061488] and ap[1530730432] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[1585399376] and ap[1640068320] (start, stride: 54668944, 54668944), (size: 16,0)
Adding ap[1694737264] and ap[1749406208] (start, stride: 54668944, 54668944), (size: 16,0)
(32,)
<pyopencl.cffi_cl.Buffer object at 0x7f3787cd8d68>
Traceback (most recent call last):
File "prefix_sum.py", line 75, in <module>
main()
File "prefix_sum.py", line 70, in main
a = prefix_sum(a, context,device)
File "prefix_sum.py", line 58, in prefix_sum
cl.enqueue_copy(command_queue, b, b_buf)
File "/usr/local/lib/python3.4/dist-packages/pyopencl/__init__.py", line 1540, in enqueue_copy
return _cl._enqueue_read_buffer(queue, src, dest, **kwargs)
File "/usr/local/lib/python3.4/dist-packages/pyopencl/cffi_cl.py", line 1308, in _enqueue_read_buffer
NannyEvent._handle(hostbuf)))
File "/usr/local/lib/python3.4/dist-packages/pyopencl/cffi_cl.py", line 549, in _handle_error
raise e
pyopencl.cffi_cl.RuntimeError: clenqueuereadbuffer failed: OUT_OF_RESOURCES
/* prefix_sum.cl */
kernel void prefix_sum(global const int * ap, global int * bp,
const int start, const int stride)
{
int i0 = get_global_id(0);
int i = start + stride * 2 * i0;
printf("Adding ap[%d] and ap[%d] (start, stride: %d, %d), (size: %d,%d)\n", i, stride+i, start, stride, get_global_size(0), get_local_size(0));
bp[i] = ap[i];
bp[i + stride] = ap[i] + ap[i + stride];
}
import numpy as np
import pyopencl as cl
def CreateContext():
platforms = cl.get_platforms()
#for device_type in [cl.device_type.GPU, cl.device_type.CPU]:
for device_type in [cl.device_type.CPU]:
for platform in platforms:
devices = platform.get_devices(cl.device_type.GPU)
if len(devices) == 0:
continue
device = devices[0]
context = cl.Context([device])
return context, device
print('Failed to find any OpenCL platforms.')
return None
def div_ceil(a,b):
return (a+b-1) // b
def prefix_sum(a, context=None,device=None):
PROGRAM_FILENAME='prefix_sum.cl'
if context is None:
context,device = CreateContext()
command_queue = cl.CommandQueue(context)
#program = CreateProgram(context, device, 'prefix_sum.cl')
with open(PROGRAM_FILENAME, 'r') as f:
program_source = f.read()
program = cl.Program(context, program_source).build(devices=[device])
start = 0
stride = 1
n = a.size // (stride * 2)
local_worksize = None
global_worksize = div_ceil(n, local_worksize) if local_worksize is not None else n
local_worksize = (local_worksize,) if local_worksize is not None else None
global_worksize = (global_worksize,)
print(global_worksize,local_worksize)
a_buf = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
b_buf = cl.Buffer(context, cl.mem_flags.WRITE_ONLY,size=a.nbytes)
print(command_queue, global_worksize, local_worksize, a_buf, b_buf, np.int32(start), np.int32(stride))
program.prefix_sum(command_queue, global_worksize, local_worksize, a_buf, b_buf, np.int32(start), np.int32(stride))
b = np.empty(a.shape, dtype=a.dtype)
print(b.shape)
print(b_buf)
cl.enqueue_copy(command_queue, b, b_buf)
return b
def main():
N = 32
#print(CreateContext())
#a = np.array(np.random.randint(0,9,16),dtype=np.int32)
a = np.array(np.arange(N),dtype=np.int32)
print(a)
context, device = CreateContext()
for _ in range(1):
a = prefix_sum(a, context,device)
print(a)
return
main()
exc@lambda ~/src/examples/opencl/prefix_sum $ optirun python2 prefix_sum.py
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
25 26 27 28 29 30 31]
((16,), None)
(<pyopencl.cffi_cl.CommandQueue object at 0x7f911b4bcb10>, (16,), None, <pyopencl.cffi_cl.Buffer object at 0x7f911b23efd0>, <pyopencl.cffi_cl.Buffer object at 0x7f911b23ee90>, 0, 1)
Adding ap[0] and ap[1] (start, stride: 0, 1), (size: 16,0)
Adding ap[2] and ap[3] (start, stride: 0, 1), (size: 16,0)
Adding ap[4] and ap[5] (start, stride: 0, 1), (size: 16,0)
Adding ap[6] and ap[7] (start, stride: 0, 1), (size: 16,0)
Adding ap[8] and ap[9] (start, stride: 0, 1), (size: 16,0)
Adding ap[10] and ap[11] (start, stride: 0, 1), (size: 16,0)
Adding ap[12] and ap[13] (start, stride: 0, 1), (size: 16,0)
Adding ap[14] and ap[15] (start, stride: 0, 1), (size: 16,0)
Adding ap[16] and ap[17] (start, stride: 0, 1), (size: 16,0)
Adding ap[18] and ap[19] (start, stride: 0, 1), (size: 16,0)
Adding ap[20] and ap[21] (start, stride: 0, 1), (size: 16,0)
Adding ap[22] and ap[23] (start, stride: 0, 1), (size: 16,0)
Adding ap[24] and ap[25] (start, stride: 0, 1), (size: 16,0)
Adding ap[26] and ap[27] (start, stride: 0, 1), (size: 16,0)
Adding ap[28] and ap[29] (start, stride: 0, 1), (size: 16,0)
Adding ap[30] and ap[31] (start, stride: 0, 1), (size: 16,0)
(32,)
<pyopencl.cffi_cl.Buffer object at 0x7f911b23ee90>
[ 0 1 2 5 4 9 6 13 8 17 10 21 12 25 14 29 16 33 18 37 20 41 22 45 24
49 26 53 28 57 30 61]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment