-
-
Save Centrinia/96fbe78fbc8f3e986786 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exc@lambda ~/src/examples/opencl/prefix_sum $ optirun python3 prefix_sum.py | |
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | |
25 26 27 28 29 30 31] | |
(16,) None | |
<pyopencl.cffi_cl.CommandQueue object at 0x7f378fefcac8> (16,) None <pyopencl.cffi_cl.Buffer object at 0x7f3787cce470> <pyopencl.cffi_cl.Buffer object at 0x7f3787cd8d68> 0 1 | |
Adding ap[54668944] and ap[109337888] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[164006832] and ap[218675776] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[273344720] and ap[328013664] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[382682608] and ap[437351552] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[492020496] and ap[546689440] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[601358384] and ap[656027328] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[710696272] and ap[765365216] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[820034160] and ap[874703104] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[929372048] and ap[984040992] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[1038709936] and ap[1093378880] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[1148047824] and ap[1202716768] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[1257385712] and ap[1312054656] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[1366723600] and ap[1421392544] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[1476061488] and ap[1530730432] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[1585399376] and ap[1640068320] (start, stride: 54668944, 54668944), (size: 16,0) | |
Adding ap[1694737264] and ap[1749406208] (start, stride: 54668944, 54668944), (size: 16,0) | |
(32,) | |
<pyopencl.cffi_cl.Buffer object at 0x7f3787cd8d68> | |
Traceback (most recent call last): | |
File "prefix_sum.py", line 75, in <module> | |
main() | |
File "prefix_sum.py", line 70, in main | |
a = prefix_sum(a, context,device) | |
File "prefix_sum.py", line 58, in prefix_sum | |
cl.enqueue_copy(command_queue, b, b_buf) | |
File "/usr/local/lib/python3.4/dist-packages/pyopencl/__init__.py", line 1540, in enqueue_copy | |
return _cl._enqueue_read_buffer(queue, src, dest, **kwargs) | |
File "/usr/local/lib/python3.4/dist-packages/pyopencl/cffi_cl.py", line 1308, in _enqueue_read_buffer | |
NannyEvent._handle(hostbuf))) | |
File "/usr/local/lib/python3.4/dist-packages/pyopencl/cffi_cl.py", line 549, in _handle_error | |
raise e | |
pyopencl.cffi_cl.RuntimeError: clenqueuereadbuffer failed: OUT_OF_RESOURCES |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* prefix_sum.cl */ | |
kernel void prefix_sum(global const int * ap, global int * bp, | |
const int start, const int stride) | |
{ | |
int i0 = get_global_id(0); | |
int i = start + stride * 2 * i0; | |
printf("Adding ap[%d] and ap[%d] (start, stride: %d, %d), (size: %d,%d)\n", i, stride+i, start, stride, get_global_size(0), get_local_size(0)); | |
bp[i] = ap[i]; | |
bp[i + stride] = ap[i] + ap[i + stride]; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pyopencl as cl | |
def CreateContext(): | |
platforms = cl.get_platforms() | |
#for device_type in [cl.device_type.GPU, cl.device_type.CPU]: | |
for device_type in [cl.device_type.CPU]: | |
for platform in platforms: | |
devices = platform.get_devices(cl.device_type.GPU) | |
if len(devices) == 0: | |
continue | |
device = devices[0] | |
context = cl.Context([device]) | |
return context, device | |
print('Failed to find any OpenCL platforms.') | |
return None | |
def div_ceil(a,b): | |
return (a+b-1) // b | |
def prefix_sum(a, context=None,device=None): | |
PROGRAM_FILENAME='prefix_sum.cl' | |
if context is None: | |
context,device = CreateContext() | |
command_queue = cl.CommandQueue(context) | |
#program = CreateProgram(context, device, 'prefix_sum.cl') | |
with open(PROGRAM_FILENAME, 'r') as f: | |
program_source = f.read() | |
program = cl.Program(context, program_source).build(devices=[device]) | |
start = 0 | |
stride = 1 | |
n = a.size // (stride * 2) | |
local_worksize = None | |
global_worksize = div_ceil(n, local_worksize) if local_worksize is not None else n | |
local_worksize = (local_worksize,) if local_worksize is not None else None | |
global_worksize = (global_worksize,) | |
print(global_worksize,local_worksize) | |
a_buf = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a) | |
b_buf = cl.Buffer(context, cl.mem_flags.WRITE_ONLY,size=a.nbytes) | |
print(command_queue, global_worksize, local_worksize, a_buf, b_buf, np.int32(start), np.int32(stride)) | |
program.prefix_sum(command_queue, global_worksize, local_worksize, a_buf, b_buf, np.int32(start), np.int32(stride)) | |
b = np.empty(a.shape, dtype=a.dtype) | |
print(b.shape) | |
print(b_buf) | |
cl.enqueue_copy(command_queue, b, b_buf) | |
return b | |
def main(): | |
N = 32 | |
#print(CreateContext()) | |
#a = np.array(np.random.randint(0,9,16),dtype=np.int32) | |
a = np.array(np.arange(N),dtype=np.int32) | |
print(a) | |
context, device = CreateContext() | |
for _ in range(1): | |
a = prefix_sum(a, context,device) | |
print(a) | |
return | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exc@lambda ~/src/examples/opencl/prefix_sum $ optirun python2 prefix_sum.py | |
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | |
25 26 27 28 29 30 31] | |
((16,), None) | |
(<pyopencl.cffi_cl.CommandQueue object at 0x7f911b4bcb10>, (16,), None, <pyopencl.cffi_cl.Buffer object at 0x7f911b23efd0>, <pyopencl.cffi_cl.Buffer object at 0x7f911b23ee90>, 0, 1) | |
Adding ap[0] and ap[1] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[2] and ap[3] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[4] and ap[5] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[6] and ap[7] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[8] and ap[9] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[10] and ap[11] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[12] and ap[13] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[14] and ap[15] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[16] and ap[17] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[18] and ap[19] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[20] and ap[21] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[22] and ap[23] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[24] and ap[25] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[26] and ap[27] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[28] and ap[29] (start, stride: 0, 1), (size: 16,0) | |
Adding ap[30] and ap[31] (start, stride: 0, 1), (size: 16,0) | |
(32,) | |
<pyopencl.cffi_cl.Buffer object at 0x7f911b23ee90> | |
[ 0 1 2 5 4 9 6 13 8 17 10 21 12 25 14 29 16 33 18 37 20 41 22 45 24 | |
49 26 53 28 57 30 61] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment