Skip to content

Instantly share code, notes, and snippets.

@dbr
Created August 28, 2011 13:03
Show Gist options
  • Save dbr/1176640 to your computer and use it in GitHub Desktop.
Save dbr/1176640 to your computer and use it in GitHub Desktop.
Colour matrix reverse engineer'er
"""PoC code to reverse-engineer an unknown colour-matrix applied to a
set of know pixel values
Moved to:
https://github.com/dbr/colourstuff/blob/master/lib/python/colourstuff/determine_colour_matrix.py
"""
import math
import array
import time
import random
import contextlib
@contextlib.contextmanager
def timeify(msg = ""):
#print "[Start] %s" % msg
start = time.time()
yield
end = time.time()
print "%s %.04fms" % (msg, (end - start) * 1000)
def make_test_data(w = 128, h = 128):
in_r, in_g, in_b = array.array('f'), array.array('f'), array.array('f')
print "Make raw data"
for x in range(w):
for y in range(h):
in_r.append(random.random())
in_g.append(random.random())
in_b.append(random.random())
mtx = array.array('f', [random.random(), random.random(), random.random()])
print "Matrixing"
out_r = array.array('f', [(r*mtx[0] + g*mtx[1] + b*mtx[2]) for (r, g, b) in zip(in_r, in_g, in_b)])
return {'in_r': in_r, 'in_g': in_g, 'in_b': in_b,
'out_r': out_r,
'mtx': mtx}
def score_matrix(ir, ig, ib, expect_r, mtx):
ms = 0
for i in range(len(ir)):
val = ir[i] * mtx[0] + ig[i] * mtx[1] + ib[i] * mtx[2]
ms += abs(val - expect_r[i]) ** 2
rms = math.sqrt(ms / float(len(ir)))
return rms
SOURCE = """
kernel void score_matrix_to_rms(
global float* in_r,
global float* in_g,
global float* in_b,
global float* expect_r,
global float* score,
int array_len,
int cube_size
){
float rm = 0;
float val;
int id = get_global_id(0);
float mtx_r = (float)((id/cube_size/cube_size)%cube_size) / (cube_size-1);
float mtx_g = (float)((id/cube_size)%cube_size) / (cube_size-1);
float mtx_b = (float)((id)%cube_size) / (cube_size-1);
for(int i = 0; i < array_len; ++i)
{
val = in_r[i] * mtx_r + in_g[i] * mtx_g + in_b[i] * mtx_b;
rm += pow(fabs(expect_r[i] - val), 2);
}
score[id] = sqrt(rm/array_len);
}
"""
def setup_opencl(data, cube_size):
import pycl
with timeify("Making context, loading kernel"):
devices = pycl.clGetDeviceIDs()
ctx = pycl.clCreateContext(devices = devices)
queue = pycl.clCreateCommandQueue(ctx)
program = pycl.clCreateProgramWithSource(ctx, SOURCE).build()
score_matrix = program['score_matrix_to_rms']
score_matrix.argtypes = (pycl.cl_mem, pycl.cl_mem, pycl.cl_mem,
pycl.cl_mem, pycl.cl_mem, pycl.cl_int, pycl.cl_int)
sub_divisions = cube_size**3
with timeify("Creating buffers"):
in_r_buf, in_evt1 = pycl.buffer_from_pyarray(queue, data['in_r'], blocking = False)
in_g_buf, in_evt2 = pycl.buffer_from_pyarray(queue, data['in_g'], blocking = False)
in_b_buf, in_evt3 = pycl.buffer_from_pyarray(queue, data['in_b'], blocking = False)
out_r = data['out_r']
out_r_buf, in_evt4 = pycl.buffer_from_pyarray(queue, out_r, blocking = False)
score = array.array('f', [0 for x in range(sub_divisions)])
score_buf, in_evt5 = pycl.buffer_from_pyarray(queue, score, blocking = False)
with timeify("Run kernel"):
run_evt = score_matrix(
in_r_buf, in_g_buf, in_b_buf, out_r_buf, score_buf,
len(data['in_r']), cube_size,
wait_for = [in_evt1, in_evt2, in_evt3, in_evt4, in_evt5]).on(queue,
sub_divisions)
with timeify("Calculate RMS"):
score_from_gpu, evt = pycl.buffer_to_pyarray(queue, score_buf,
wait_for=run_evt,
like=score)
return score_from_gpu
if __name__ == '__main__':
print
def index_to_rgb(i, cube_size):
r = float( ((i/cube_size/cube_size)%cube_size) ) / (cube_size-1)
g = float( ((i/cube_size)%cube_size) ) / (cube_size-1)
b = float( ((i)%cube_size) ) / (cube_size-1)
return (r, g, b)
print "OpenCL matrix guesser"
img_size = 8
cube_size = 256
with timeify("Make test data"):
data = make_test_data(w = img_size, h = img_size)
with timeify("OpenCL impl"):
from_cl = setup_opencl(data, cube_size = cube_size)
with timeify("Get lowest value"):
closest = min(from_cl)
"""
with timeify("CPU"):
from_py = score_matrix(data['in_r'], data['in_g'], data['in_b'], data['out_r'], mtx = new_mtx)
dif = abs(from_py - from_cl)
print "cl: %.10f\npy: %.10f\nDiff: %.10f" % (from_cl, from_py, dif)
"""
print "secret test matrix was", "%.04f %.04f %.04f" % tuple(data['mtx'].tolist())
print "Closest ", "%.04f %.04f %.04f" % index_to_rgb(from_cl.index(closest), cube_size = cube_size)
#!/usr/bin/env python
# from http://pypi.python.org/pypi/pycl/
"""
Brief usage example:
>>> from array import array
>>> source = '''
... kernel void mxplusb(float m, global float *x, float b, global float *out) {
... int i = get_global_id(0);
... out[i] = m*x[i]+b;
... }
... '''
>>> ctx = clCreateContext()
>>> queue = clCreateCommandQueue(ctx)
>>> program = clCreateProgramWithSource(ctx, source).build()
>>> kernel = program['mxplusb']
>>> kernel.argtypes = (cl_float, cl_mem, cl_float, cl_mem)
>>> x = array('f', range(100))
>>> x_buf, in_evt = buffer_from_pyarray(queue, x, blocking=False)
>>> y_buf = x_buf.empty_like_this()
>>> run_evt = kernel(2, x_buf, 5, y_buf).on(queue, len(x), wait_for=in_evt)
>>> y, evt = buffer_to_pyarray(queue, y_buf, wait_for=run_evt, like=x)
>>> evt.wait()
>>> y[0:10]
array('f', [5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0])
For Numpy users, see :func:`buffer_from_ndarray` and
:func:`buffer_to_ndarray`.
Additionally, if run as a script, will print out a summary
of your platforms and devices.
Most of the C typedefs are available as subclasses of
Python ctypes datatypes. The spelling might be slightly
different.
The various enumeration and bitfield types have attributes
representing their defined constants (e.g.
:const:`~cl_device_type.CL_DEVICE_TYPE_GPU`). These
constants are also available at the module level, in case
you can't remember what type
:const:`~cl_command_execution_status.CL_QUEUED` is supposed
to be. They are all somewhat magical in that they'll
make a reasonable effort to pretty-print themselves:
>>> CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU
CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU
>>> cl_mem_info(0x1100)
CL_MEM_TYPE
The types representing various object-like datastructures
often have attributes so that you can view their infos
without needing to call the appropriate ``clGetThingInfo``
function. They may have other methods and behaviors.
One last note about the datatypes: despite any appearance
of magic and high-level function, these are just ctypes
objects. It is entirely possible for you to assign things
to the :attr:`value` attribute of the enum/bitfield
constants or of object-like items. Overwriting constants
and clobbering pointers is generally a bad idea, though,
so you should probably avoid it. (I tried vetoing
assignment to .value, but PyPy didn't like that.
So you're on your own.)
Wrapped OpenCL functions have their usual naming convention
(``clDoSomething``). These are't the naked C function
pointers - you will find that the argument lists,
return types, and exception raising are more in line with
Python. Check the docstrings. That said, you can refer to
the function pointer itself with the wrapped function's
:attr:`call` attribute, which is how the functions
themselves do it. The function pointer itself has argument
type, return type, and error checking added in the usual
ctypes manner.
The list of wrapped functions is *very* incomplete. Feel
free to contribute if you need a function that hasn't been
wrapped yet.
There are currently no plans to provide wrappers for OpenCL
extensions (like OpenGL interop). Maybe later.
"""
# Copyright (c) 2011 Ken Watford
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# tl;dr - MIT license.
__version__ = '0.1a2'
import ctypes
from ctypes import (
c_size_t as size_t, c_void_p as void_p, c_char_p as char_p,
POINTER as P, byref, sizeof, pointer, cast, create_string_buffer)
import os
import sys
from warnings import warn
from array import array
try:
import numpy as np
except ImportError:
pass
class cl_sampler(void_p): pass
class cl_char(ctypes.c_int8): pass
class cl_uchar(ctypes.c_uint8): pass
class cl_short(ctypes.c_int16): pass
class cl_ushort(ctypes.c_uint16): pass
class cl_int(ctypes.c_int32): pass
class cl_uint(ctypes.c_uint32): pass
class cl_long(ctypes.c_int64): pass
class cl_ulong(ctypes.c_uint64): pass
class cl_half(ctypes.c_uint16): pass
class cl_float(ctypes.c_float): pass
class cl_double(ctypes.c_double): pass
class cl_bool(cl_uint): pass
class cl_uenum(cl_uint):
# Base class for the various unsigned int
# constants defined in OpenCL.
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
else:
return self.value == other.value
def __ne__(self, other):
return not(self == other)
def __hash__(self):
return self.value.__hash__()
def __repr__(self):
by_value = self.__class__._by_value
names = []
if self in by_value:
return by_value[self]
elif self.value:
return "UNKNOWN(0%x)" % self.value
else:
return "NONE"
class cl_enum(cl_int):
# Base class for various signed int enums.
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
else:
return self.value == other.value
def __ne__(self, other):
return not(self == other)
def __hash__(self):
return self.value.__hash__()
def __repr__(self):
by_value = self.__class__._by_value
names = []
if self in by_value:
return by_value[self]
elif self.value:
return "UNKNOWN(0x%x)" % self.value
else:
return "NONE"
class cl_bitfield(cl_ulong):
# Base class for bitfield values found in OpenCL.
# Bitwise operations for combining flags are supported.
def __or__(self, other):
assert isinstance(other, self.__class__)
return self.__class__(self.value | other.value)
def __and__(self, other):
assert isinstance(other, self.__class__)
return self.__class__(self.value & other.value)
def __xor__(self):
assert isinstance(other, self.__class__)
return self.__class__(self.value ^ other.value)
def __not__(self):
return self.__class__(~self.value)
def __contains__(self, other):
assert isinstance(other, self.__class__)
return (self.value & other.value) == other.value
def __hash__(self):
return self.value.__hash__()
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
else:
return self.value == other.value
def __ne__(self, other):
return not(self == other)
def __repr__(self):
by_value = self.__class__._by_value
names = []
if self in by_value:
return by_value[self]
for val in by_value:
if val in self:
names.append(by_value[val])
if names:
return " | ".join(names)
elif self.value:
return "UNKNOWN(0x%x)" % self.value
else:
return "NONE"
class cl_device_type(cl_bitfield):
"""
Bitfield used by :func:`clCreateContextFromType` to
create a context from one or more matching device types.
See also :attr:`cl_device.type` and :func:`clGetDeviceInfo`
"""
CL_DEVICE_TYPE_DEFAULT = (1 << 0)
CL_DEVICE_TYPE_CPU = (1 << 1)
CL_DEVICE_TYPE_GPU = (1 << 2)
CL_DEVICE_TYPE_ACCELERATOR = (1 << 3)
CL_DEVICE_TYPE_ALL = 0xFFFFFFFF
class cl_errnum(cl_enum):
"""
A status code returned by most OpenCL functions.
Exceptions exist for each error code and will be
raised in the event that the code is flagged by
any wrapper function. The exception names are formed
by removing the 'CL', title-casing the words, removing
the underscores, and appending 'Error' to the end.
Some of these are a little redundant, like
:exc:`BuildProgramFailureError`.
And no, there is no :exc:`SuccessError`.
"""
CL_SUCCESS = 0
CL_DEVICE_NOT_FOUND = -1
CL_DEVICE_NOT_AVAILABLE = -2
CL_COMPILER_NOT_AVAILABLE = -3
CL_MEM_OBJECT_ALLOCATION_FAILURE = -4
CL_OUT_OF_RESOURCES = -5
CL_OUT_OF_HOST_MEMORY = -6
CL_PROFILING_INFO_NOT_AVAILABLE = -7
CL_MEM_COPY_OVERLAP = -8
CL_IMAGE_FORMAT_MISMATCH = -9
CL_IMAGE_FORMAT_NOT_SUPPORTED = -10
CL_BUILD_PROGRAM_FAILURE = -11
CL_MAP_FAILURE = -12
CL_MISALIGNED_SUB_BUFFER_OFFSET = -13
CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST = -14
CL_INVALID_VALUE = -30
CL_INVALID_DEVICE_TYPE = -31
CL_INVALID_PLATFORM = -32
CL_INVALID_DEVICE = -33
CL_INVALID_CONTEXT = -34
CL_INVALID_QUEUE_PROPERTIES = -35
CL_INVALID_COMMAND_QUEUE = -36
CL_INVALID_HOST_PTR = -37
CL_INVALID_MEM_OBJECT = -38
CL_INVALID_IMAGE_FORMAT_DESCRIPTOR = -39
CL_INVALID_IMAGE_SIZE = -40
CL_INVALID_SAMPLER = -41
CL_INVALID_BINARY = -42
CL_INVALID_BUILD_OPTIONS = -43
CL_INVALID_PROGRAM = -44
CL_INVALID_PROGRAM_EXECUTABLE = -45
CL_INVALID_KERNEL_NAME = -46
CL_INVALID_KERNEL_DEFINITION = -47
CL_INVALID_KERNEL = -48
CL_INVALID_ARG_INDEX = -49
CL_INVALID_ARG_VALUE = -50
CL_INVALID_ARG_SIZE = -51
CL_INVALID_KERNEL_ARGS = -52
CL_INVALID_WORK_DIMENSION = -53
CL_INVALID_WORK_GROUP_SIZE = -54
CL_INVALID_WORK_ITEM_SIZE = -55
CL_INVALID_GLOBAL_OFFSET = -56
CL_INVALID_EVENT_WAIT_LIST = -57
CL_INVALID_EVENT = -58
CL_INVALID_OPERATION = -59
CL_INVALID_GL_OBJECT = -60
CL_INVALID_BUFFER_SIZE = -61
CL_INVALID_MIP_LEVEL = -62
CL_INVALID_GLOBAL_WORK_SIZE = -63
CL_INVALID_PROPERTY = -64
class cl_platform_info(cl_uenum):
"""
The set of possible parameter names used
with the :func:`clGetPlatformInfo` function.
"""
CL_PLATFORM_PROFILE = 0x0900
CL_PLATFORM_VERSION = 0x0901
CL_PLATFORM_NAME = 0x0902
CL_PLATFORM_VENDOR = 0x0903
CL_PLATFORM_EXTENSIONS = 0x0904
class cl_device_info(cl_uenum):
"""
The set of possible parameter names used
with the :func:`clGetDeviceInfo` function.
"""
CL_DEVICE_TYPE = 0x1000
CL_DEVICE_VENDOR_ID = 0x1001
CL_DEVICE_MAX_COMPUTE_UNITS = 0x1002
CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = 0x1003
CL_DEVICE_MAX_WORK_GROUP_SIZE = 0x1004
CL_DEVICE_MAX_WORK_ITEM_SIZES = 0x1005
CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR = 0x1006
CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT = 0x1007
CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT = 0x1008
CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG = 0x1009
CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT = 0x100A
CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE = 0x100B
CL_DEVICE_MAX_CLOCK_FREQUENCY = 0x100C
CL_DEVICE_ADDRESS_BITS = 0x100D
CL_DEVICE_MAX_READ_IMAGE_ARGS = 0x100E
CL_DEVICE_MAX_WRITE_IMAGE_ARGS = 0x100F
CL_DEVICE_MAX_MEM_ALLOC_SIZE = 0x1010
CL_DEVICE_IMAGE2D_MAX_WIDTH = 0x1011
CL_DEVICE_IMAGE2D_MAX_HEIGHT = 0x1012
CL_DEVICE_IMAGE3D_MAX_WIDTH = 0x1013
CL_DEVICE_IMAGE3D_MAX_HEIGHT = 0x1014
CL_DEVICE_IMAGE3D_MAX_DEPTH = 0x1015
CL_DEVICE_IMAGE_SUPPORT = 0x1016
CL_DEVICE_MAX_PARAMETER_SIZE = 0x1017
CL_DEVICE_MAX_SAMPLERS = 0x1018
CL_DEVICE_MEM_BASE_ADDR_ALIGN = 0x1019
CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE = 0x101A
CL_DEVICE_SINGLE_FP_CONFIG = 0x101B
CL_DEVICE_GLOBAL_MEM_CACHE_TYPE = 0x101C
CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE = 0x101D
CL_DEVICE_GLOBAL_MEM_CACHE_SIZE = 0x101E
CL_DEVICE_GLOBAL_MEM_SIZE = 0x101F
CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE = 0x1020
CL_DEVICE_MAX_CONSTANT_ARGS = 0x1021
CL_DEVICE_LOCAL_MEM_TYPE = 0x1022
CL_DEVICE_LOCAL_MEM_SIZE = 0x1023
CL_DEVICE_ERROR_CORRECTION_SUPPORT = 0x1024
CL_DEVICE_PROFILING_TIMER_RESOLUTION = 0x1025
CL_DEVICE_ENDIAN_LITTLE = 0x1026
CL_DEVICE_AVAILABLE = 0x1027
CL_DEVICE_COMPILER_AVAILABLE = 0x1028
CL_DEVICE_EXECUTION_CAPABILITIES = 0x1029
CL_DEVICE_QUEUE_PROPERTIES = 0x102A
CL_DEVICE_NAME = 0x102B
CL_DEVICE_VENDOR = 0x102C
CL_DRIVER_VERSION = 0x102D
CL_DEVICE_PROFILE = 0x102E
CL_DEVICE_VERSION = 0x102F
CL_DEVICE_EXTENSIONS = 0x1030
CL_DEVICE_PLATFORM = 0x1031
CL_DEVICE_DOUBLE_FP_CONFIG = 0x1032
CL_DEVICE_HALF_FP_CONFIG = 0x1033
CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF = 0x1034
CL_DEVICE_HOST_UNIFIED_MEMORY = 0x1035
CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR = 0x1036
CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT = 0x1037
CL_DEVICE_NATIVE_VECTOR_WIDTH_INT = 0x1038
CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG = 0x1039
CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT = 0x103A
CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE = 0x103B
CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF = 0x103C
CL_DEVICE_OPENCL_C_VERSION = 0x103D
class cl_device_fp_config(cl_bitfield):
"""
One of the possible return types from :func:`clGetDeviceInfo`.
Bitfield identifying the floating point capabilities of the device.
"""
CL_FP_DENORM = (1 << 0)
CL_FP_INF_NAN = (1 << 1)
CL_FP_ROUND_TO_NEAREST = (1 << 2)
CL_FP_ROUND_TO_ZERO = (1 << 3)
CL_FP_ROUND_TO_INF = (1 << 4)
CL_FP_FMA = (1 << 5)
CL_FP_SOFT_FLOAT = (1 << 6)
class cl_device_mem_cache_type(cl_uenum):
"""
One of the possible return types from :func:`clGetDeviceInfo`.
Describes the nature of the device's cache, if any.
"""
CL_NONE = 0x0
CL_READ_ONLY_CACHE = 0x1
CL_READ_WRITE_CACHE = 0x2
class cl_device_local_mem_type(cl_uenum):
"""
One of the possible return types from :func:`clGetDeviceInfo`.
Describes where 'local' memory lives in the device.
Presumably, :const:`~cl_device_local_mem_type.CL_GLOBAL` means
the device's local memory lives in the same address space as its
global memory.
"""
CL_LOCAL = 0x1
CL_GLOBAL = 0x2
class cl_device_exec_capabilities(cl_bitfield):
"""
One of the possible return types from :func:`clGetDeviceInfo`.
Bitfield identifying what kind of kernels can be executed.
All devices can execute OpenCL C kernels, but some have their
own native kernel types as well.
"""
CL_EXEC_KERNEL = (1 << 0)
CL_EXEC_NATIVE_KERNEL = (1 << 1)
class cl_command_queue_properties(cl_bitfield):
"""
Bitfield representing the properties of a command queue.
"""
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0)
CL_QUEUE_PROFILING_ENABLE = (1 << 1)
class cl_context_properties(void_p):
"""
If you find yourself looking at an array of these and
need to make any sense of them... good luck! It's a list
of key-value pairs, null-terminated. The keys are unsigned ints
representing enum constants.
:const:`~cl_context_info.CL_CONTEXT_PLATFORM` (0x1084)
is the most common one you'll see. I believe the rest are
parts of extensions, such as the OpenGL interop extension.
The meaning of the odd elements depends entirely on the
enum that came just before it. In the case of
:const:`~cl_context_info.CL_CONTEXT_PLATFORM`,
the value represents a pointer to a cl_platform object.
"""
pass
class cl_context_info(cl_uenum):
"""
Parameter names understood by :func:`clGetContextInfo`.
Note that :const:`cl_context_inf.CL_CONTEXT_PLATFORM` does not technically
belong here, and the C-level code won't accept it. The wrapped
version of :func:`clGetContextInfo` will, however, recognize it
and extract the appropriate value from the context's
properties list.
"""
CL_CONTEXT_REFERENCE_COUNT = 0x1080
CL_CONTEXT_DEVICES = 0x1081
CL_CONTEXT_PROPERTIES = 0x1082
CL_CONTEXT_NUM_DEVICES = 0x1083
CL_CONTEXT_PLATFORM = 0x1084
class cl_command_queue_info(cl_uenum):
"""
Parameter names understood by :func:`clGetCommandQueueInfo`
"""
CL_QUEUE_CONTEXT = 0x1090
CL_QUEUE_DEVICE = 0x1091
CL_QUEUE_REFERENCE_COUNT = 0x1092
CL_QUEUE_PROPERTIES = 0x1093
class cl_channel_order(cl_uenum):
"""
Indicates the meanings of vector fields in an image.
"""
CL_R = 0x10B0
CL_A = 0x10B1
CL_RG = 0x10B2
CL_RA = 0x10B3
CL_RGB = 0x10B4
CL_RGBA = 0x10B5
CL_BGRA = 0x10B6
CL_ARGB = 0x10B7
CL_INTENSITY = 0x10B8
CL_LUMINANCE = 0x10B9
CL_Rx = 0x10BA
CL_RGx = 0x10BB
CL_RGBx = 0x10BC
class cl_channel_type(cl_uenum):
"""
Indicates the type and size of image channels.
"""
CL_SNORM_INT8 = 0x10D0
CL_SNORM_INT16 = 0x10D1
CL_UNORM_INT8 = 0x10D2
CL_UNORM_INT16 = 0x10D3
CL_UNORM_SHORT_565 = 0x10D4
CL_UNORM_SHORT_555 = 0x10D5
CL_UNORM_INT_101010 = 0x10D6
CL_SIGNED_INT8 = 0x10D7
CL_SIGNED_INT16 = 0x10D8
CL_SIGNED_INT32 = 0x10D9
CL_UNSIGNED_INT8 = 0x10DA
CL_UNSIGNED_INT16 = 0x10DB
CL_UNSIGNED_INT32 = 0x10DC
CL_HALF_FLOAT = 0x10DD
CL_FLOAT = 0x10DE
class cl_mem_flags(cl_bitfield):
"""
Bitfield used when constructing a memory object.
Indicates both the read/write status of the memory as
well as how the memory interacts with whatever host
pointer was provided. See the OpenCL docs_ for
:func:`clCreateBuffer` for more information.
.. _docs: http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clCreateBuffer.html
"""
CL_MEM_READ_WRITE = (1 << 0)
CL_MEM_WRITE_ONLY = (1 << 1)
CL_MEM_READ_ONLY = (1 << 2)
CL_MEM_USE_HOST_PTR = (1 << 3)
CL_MEM_ALLOC_HOST_PTR = (1 << 4)
CL_MEM_COPY_HOST_PTR = (1 << 5)
class cl_mem_object_type(cl_uenum):
"""
Possible return type for :func:`clGetMemObjectInfo`.
Indicates the type of the memory object.
"""
CL_MEM_OBJECT_BUFFER = 0x10F0
CL_MEM_OBJECT_IMAGE2D = 0x10F1
CL_MEM_OBJECT_IMAGE3D = 0x10F2
class cl_mem_info(cl_uenum):
"""
Parameter names accepted by :func:`clGetMemObjectInfo`
"""
CL_MEM_TYPE = 0x1100
CL_MEM_FLAGS = 0x1101
CL_MEM_SIZE = 0x1102
CL_MEM_HOST_PTR = 0x1103
CL_MEM_MAP_COUNT = 0x1104
CL_MEM_REFERENCE_COUNT = 0x1105
CL_MEM_CONTEXT = 0x1106
CL_MEM_ASSOCIATED_MEMOBJECT = 0x1107
CL_MEM_OFFSET = 0x1108
class cl_image_info(cl_uenum):
"""
Parameter names accepted by :func:`clGetImageInfo`
"""
CL_IMAGE_FORMAT = 0x1110
CL_IMAGE_ELEMENT_SIZE = 0x1111
CL_IMAGE_ROW_PITCH = 0x1112
CL_IMAGE_SLICE_PITCH = 0x1113
CL_IMAGE_WIDTH = 0x1114
CL_IMAGE_HEIGHT = 0x1115
CL_IMAGE_DEPTH = 0x1116
class cl_buffer_create_type(cl_uenum):
"""
Parameter type for :func:`clCreateSubBuffer` that indicates
how the subbuffer will be described.
The only supported value is
:const:`~cl_buffer_create_type.CL_BUFFER_CREATE_TYPE_REGION`,
which indicates the subbuffer will be a contiguous region as
defined by a :class:`cl_buffer_region` struct.
"""
CL_BUFFER_CREATE_TYPE_REGION = 0x1220
class cl_addressing_mode(cl_uenum):
"""
Addressing mode for sampler objects.
Returned by :func:`clGetSamplerInfo`.
"""
CL_ADDRESS_NONE = 0x1130
CL_ADDRESS_CLAMP_TO_EDGE = 0x1131
CL_ADDRESS_CLAMP = 0x1132
CL_ADDRESS_REPEAT = 0x1133
CL_ADDRESS_MIRRORED_REPEAT = 0x1134
class cl_filter_mode(cl_uenum):
"""
Filter mode for sampler objects.
Returned by :func:`clGetSamplerInfo`.
"""
CL_FILTER_NEAREST = 0x1140
CL_FILTER_LINEAR = 0x1141
class cl_sampler_info(cl_uenum):
"""
Parameter names for :func:`clGetSamplerInfo`.
"""
CL_SAMPLER_REFERENCE_COUNT = 0x1150
CL_SAMPLER_CONTEXT = 0x1151
CL_SAMPLER_NORMALIZED_COORDS = 0x1152
CL_SAMPLER_ADDRESSING_MODE = 0x1153
CL_SAMPLER_FILTER_MODE = 0x1154
class cl_map_flags(cl_bitfield):
"""
Read/write flags used for applying memory mappings
to memory objects. See :func:`clEnqueueMapBuffer`
and :func:`clEnqueueMapImage`.
"""
CL_MAP_READ = (1 << 0)
CL_MAP_WRITE = (1 << 1)
class cl_program_info(cl_uenum):
"""
Parameter names for :func:`clGetProgramInfo`
"""
CL_PROGRAM_REFERENCE_COUNT = 0x1160
CL_PROGRAM_CONTEXT = 0x1161
CL_PROGRAM_NUM_DEVICES = 0x1162
CL_PROGRAM_DEVICES = 0x1163
CL_PROGRAM_SOURCE = 0x1164
CL_PROGRAM_BINARY_SIZES = 0x1165
CL_PROGRAM_BINARIES = 0x1166
class cl_program_build_info(cl_uenum):
"""
Parameter names for :func:`clGetProgramBuildInfo`
"""
CL_PROGRAM_BUILD_STATUS = 0x1181
CL_PROGRAM_BUILD_OPTIONS = 0x1182
CL_PROGRAM_BUILD_LOG = 0x1183
class cl_build_status(cl_enum):
"""
Returned by :func:`clGetProgramBuildInfo`.
Indicates build status for the program on the
specified device.
"""
CL_BUILD_SUCCESS = 0
CL_BUILD_NONE = -1
CL_BUILD_ERROR = -2
CL_BUILD_IN_PROGRESS = -3
class cl_kernel_info(cl_uenum):
"""
Parameter names for :func:`clGetKernelInfo`
"""
CL_KERNEL_FUNCTION_NAME = 0x1190
CL_KERNEL_NUM_ARGS = 0x1191
CL_KERNEL_REFERENCE_COUNT = 0x1192
CL_KERNEL_CONTEXT = 0x1193
CL_KERNEL_PROGRAM = 0x1194
class cl_kernel_work_group_info(cl_uenum):
"""
Parameter names for :func:`clGetKernelWorkGroupInfo`
"""
CL_KERNEL_WORK_GROUP_SIZE = 0x11B0
CL_KERNEL_COMPILE_WORK_GROUP_SIZE = 0x11B1
CL_KERNEL_LOCAL_MEM_SIZE = 0x11B2
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE= 0x11B3
CL_KERNEL_PRIVATE_MEM_SIZE = 0x11B4
class cl_event_info(cl_uenum):
"""
Parameter names for :func:`clGetEventInfo`
"""
CL_EVENT_COMMAND_QUEUE = 0x11D0
CL_EVENT_COMMAND_TYPE = 0x11D1
CL_EVENT_REFERENCE_COUNT = 0x11D2
CL_EVENT_COMMAND_EXECUTION_STATUS = 0x11D3
CL_EVENT_CONTEXT = 0x11D4
class cl_command_type(cl_uenum):
"""
Command types recorded on events and returned by
:func:`clGetEventInfo`.
"""
CL_COMMAND_NDRANGE_KERNEL = 0x11F0
CL_COMMAND_TASK = 0x11F1
CL_COMMAND_NATIVE_KERNEL = 0x11F2
CL_COMMAND_READ_BUFFER = 0x11F3
CL_COMMAND_WRITE_BUFFER = 0x11F4
CL_COMMAND_COPY_BUFFER = 0x11F5
CL_COMMAND_READ_IMAGE = 0x11F6
CL_COMMAND_WRITE_IMAGE = 0x11F7
CL_COMMAND_COPY_IMAGE = 0x11F8
CL_COMMAND_COPY_IMAGE_TO_BUFFER = 0x11F9
CL_COMMAND_COPY_BUFFER_TO_IMAGE = 0x11FA
CL_COMMAND_MAP_BUFFER = 0x11FB
CL_COMMAND_MAP_IMAGE = 0x11FC
CL_COMMAND_UNMAP_MEM_OBJECT = 0x11FD
CL_COMMAND_MARKER = 0x11FE
CL_COMMAND_ACQUIRE_GL_OBJECTS = 0x11FF
CL_COMMAND_RELEASE_GL_OBJECTS = 0x1200
CL_COMMAND_READ_BUFFER_RECT = 0x1201
CL_COMMAND_WRITE_BUFFER_RECT = 0x1202
CL_COMMAND_COPY_BUFFER_RECT = 0x1203
CL_COMMAND_USER = 0x1204
class cl_command_execution_status(cl_uenum):
"""
Status of the command associated with an event,
returned by :func:`clGetEventInfo`.
"""
CL_COMPLETE = 0x0
CL_RUNNING = 0x1
CL_SUBMITTED = 0x2
CL_QUEUED = 0x3
class cl_profiling_info(cl_uenum):
"""
Parameter names for :func:`clGetEventProfilingInfo`.
Indicates the point in time of the event's life that
should be queried.
"""
CL_PROFILING_COMMAND_QUEUED = 0x1280
CL_PROFILING_COMMAND_SUBMIT = 0x1281
CL_PROFILING_COMMAND_START = 0x1282
CL_PROFILING_COMMAND_END = 0x1283
class cl_image_format(ctypes.Structure):
"""
Represents image formats. See :func:`clCreateImage2D`.
.. attribute:: image_channel_order
A :class:`cl_channel_order` value
.. attribute:: image_channel_data_type
A :class:`cl_channel_type` value
"""
_fields_ = [('image_channel_order', cl_channel_order),
('image_channel_data_type', cl_channel_type),]
def __repr__(self):
return "%s(%s, %s)" % (self.__class__.__name__,
self.image_channel_order,
self.image_channel_data_type)
class cl_buffer_region(ctypes.Structure):
"""
A buffer region has two fields: :attr:`origin` and :attr:`size`.
Both are of type :c:type:`size_t`.
See :func:`clCreateSubBuffer` for usage.
"""
_fields_ = [('origin', size_t),
('size', size_t),]
def __repr__(self):
return "%s(%s, %s)" % (self.__class__.__name__,
int(self.origin),
int(self.size))
# Take care of some last-minute meta stuff.
# I would use metaclasses to handle this, but Python 3 expects different
# metaclass syntax, and I didn't want to have to run it through 2to3.
# I would use class decorators to handle this, but Python 2.5 doesn't
# understand them. And it's easier to iterate through like this than to
# write in the "manual class decorator" line after each class.
# For enums and bitfields, do magic. Each type gets a registry of the
# names and values of their defined elements, to support pretty printing.
# Further, each of the class variables (which are defined using ints) is
# upgraded to be a member of the class in question.
# Additionally, each of the constants is copied into the module scope.
for cls in (cl_enum.__subclasses__() +
cl_uenum.__subclasses__() +
cl_bitfield.__subclasses__()):
if cls.__name__ not in globals():
# Don't apply this to types that ctypes makes automatically,
# like the _be classes. Doing so will overwrite the declared
# constants at global scope, which is really weird.
continue
cls._by_name = dict()
cls._by_value = dict()
if not cls.__doc__:
cls.__doc__ = ""
for name, value in cls.__dict__.items():
if isinstance(value, int):
obj = cls(value)
setattr(cls, name, obj)
cls._by_name[name] = obj
cls._by_value[obj] = name
globals()[name] = obj
cls.__doc__ += """
.. attribute:: %s
""" % name
cls.NONE = cls(0)
# cleanup
del cls; del name; del value; del obj
# Generate exception tree
class OpenCLError(Exception):
"""
The base class from which all of the (generated)
OpenCL errors are descended. These exceptions
correspond to the :class:`cl_errnum` status codes.
"""
pass
cl_errnum._errors = dict()
for name, val in cl_errnum._by_name.items():
if name == "CL_SUCCESS": continue # Sorry, no SuccessError
errname = "".join(y.title() for y in name.split("_")[1:]) + 'Error'
errtype = type(errname, (OpenCLError,), {'value':val})
globals()[errname] = errtype
cl_errnum._errors[val] = errtype
del name; del val; del errname; del errtype
# Locate and load the shared library.
_dll_filename = os.getenv('PYCL_OPENCL')
if not _dll_filename:
try:
from ctypes.util import find_library as _find_library
_dll_filename = _find_library('OpenCL')
except ImportError:
pass
if _dll_filename:
try:
_dll = ctypes.cdll.LoadLibrary(_dll_filename)
except:
raise RuntimeError('Could not load OpenCL dll: %s' % _dll_filename)
else:
if os.environ.get('READTHEDOCS', None) == 'True':
# Don't care if we can load the DLL on RTD.
_dll = None
else:
raise RuntimeError('Could not locate OpenCL dll. Please set the PYCL_OPENCL environment variable to its full path.')
def _result_errcheck(result, func, args):
"""
For use in the errcheck attribute of a ctypes function wrapper.
Most OpenCL functions return a cl_errnum. This checks it for
an error code and raises an appropriate exception if it finds one.
This is the default error checker when using _wrapdll
"""
if result != CL_SUCCESS:
raise cl_errnum._errors[result]
return result
def _lastarg_errcheck(result, func, args):
"""
For use in the errcheck attribute of a ctypes function wrapper.
Most OpenCL functions that don't return their error code expect
you to provide a pointer for it as the last argument. To use this,
the last argument of the call should be something like `byref(cl_errnum())`
"""
lastarg = args[-1]
if hasattr(lastarg, '_obj'):
status = lastarg._obj
else:
# In PyPy, the byref object is an actual pointer.
status = lastarg[0]
if status != CL_SUCCESS:
raise cl_errnum._errors[status]
return result
def _wrapdll(*argtypes, **kw):
"""
Decorator used to simplify wrapping OpenCL functions a bit.
The positional arguments represent the ctypes argument types the
C-level function expects, and will be used to do argument type checking.
If a `res` keyword argument is given, it represents the C-level
function's expected return type. The default is `cl_errnum`.
If an `err` keyword argument is given, it represents an error checker
that should be run after low-level calls. The `_result_errcheck` and
`_lastarg_errcheck` functions should be sufficient for most OpenCL
functions. `_result_errcheck` is the default value.
The decorated function should have the same name as the underlying
OpenCL function, since the function name is used to do the lookup. The
C-level function pointer will be stored in the decorated function's
`call` attribute, and should be used by the decorated function to
perform the actual call(s). The wrapped function is otherwise untouched.
If no C-level function by this name is found in the OpenCL library
(perhaps it's version 1.0?) the decorator will discard the original
function. The replacement simply raises NotImplementedError if called.
.. todo::
Reconsider this last bit. Maybe let the wrapper compensate for the
lack of function pointer.
"""
def dowrap(f):
try:
wrapped_func = getattr(_dll, f.__name__)
except:
def badfunc(*args, **kw):
raise NotImplementedError("Function %s not present "
"in this version of OpenCL" %
f.__name__)
wrapped_func = badfunc
wrapped_func.argtypes = argtypes
res = kw.pop('res', cl_errnum)
wrapped_func.restype = res
err = kw.pop('err', _result_errcheck)
wrapped_func.errcheck = err
f.call = wrapped_func
return f
return dowrap
#################
# Event Objects #
#################
class cl_event(void_p):
"""
An OpenCL Event object. Returned by functions that add commands
to a :class:`cl_command_queue`, and often accepted (singly or in
lists) by the ``wait_for`` argument of these functions to impose
ordering.
Use :meth:`wait` to wait for a particular event to complete, or
:func:`clWaitForEvents` to wait for several of them at once.
These objects participate in OpenCL's reference counting scheme.
"""
@property
def queue(self):
"""The queue this event was emitted from."""
try: return self._queue
except AttributeError:
return clGetEventInfo(self, CL_EVENT_COMMAND_QUEUE)
@property
def context(self):
"""The context this event exists within."""
try: return self._context
except AttributeError:
return clGetEventInfo(self, CL_EVENT_CONTEXT)
@property
def type(self):
"""
The type of command this event is linked to.
See :class:`cl_command_type`.
"""
try: return self._type
except AttributeError:
return clGetEventInfo(self, CL_EVENT_COMMAND_TYPE)
@property
def status(self):
"""
Execution status of the command the event is linked to.
See :class:`cl_command_exec_status`.
"""
return clGetEventInfo(self, CL_EVENT_COMMAND_EXECUTION_STATUS)
@property
def reference_count(self):
"""Reference count for OpenCL garbage collection."""
return clGetEventInfo(self, CL_EVENT_REFERENCE_COUNT)
def wait(self):
"""Blocks until this event completes."""
clWaitForEvents(self)
def __repr__(self):
try:
return "<cl_event %s (%s) >" % (self.type, self.status)
except:
return "<cl_event 0x%x>" % (self.value or 0)
def __del__(self):
try:
if self:
clReleaseEvent(self)
except:
pass
def _make_event_array(events):
if not events: return (0, None)
if isinstance(events, cl_event):
events = [events]
valid_events = [e for e in events if e]
numevents = len(valid_events)
event_array = (cl_event * numevents)()
for i, e in enumerate(valid_events):
event_array[i] = e
return (numevents, event_array)
@_wrapdll(cl_uint, P(cl_event))
def clWaitForEvents(*events):
"""
Accepts several events and blocks until they all complete.
"""
if not events: return
nevents, event_array = _make_event_array(events)
if nevents: clWaitForEvents.call(nevents, event_array)
@_wrapdll(cl_event)
def clRetainEvent(event):
clRetainEvent.call(event)
@_wrapdll(cl_event)
def clReleaseEvent(event):
clReleaseEvent.call(event)
@_wrapdll(cl_event, cl_event_info, size_t, void_p, P(size_t))
def clGetEventInfo(event, param_name):
"""
:param param_name: An instance of :class:`cl_event_info`.
Event information can be more easily obtained by querying
the properties of the event object, which in turn will
call this function.
"""
if param_name == CL_EVENT_COMMAND_QUEUE:
try:
return event._queue
except AttributeError:
param_value = cl_command_queue()
clGetEventInfo.call(event, param_name, sizeof(param_value),
byref(param_value), None)
clRetainCommandQueue(param_value)
event._queue = param_value
return param_value
elif param_name == CL_EVENT_CONTEXT:
try:
return event._context
except AttributeError:
param_value = cl_context()
clGetEventInfo.call(event, param_name, sizeof(param_value),
byref(param_value), None)
clRetainContext(param_value)
event._context = param_value
return param_value
elif param_name == CL_EVENT_COMMAND_TYPE:
try:
return event._type
except AttributeError:
param_value = cl_command_type()
clGetEventInfo.call(event, param_name, sizeof(param_value),
byref(param_value), None)
event._type = param_value
return param_value
elif param_name == CL_EVENT_COMMAND_EXECUTION_STATUS:
param_value = cl_command_execution_status()
clGetEventInfo.call(event, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name == CL_EVENT_REFERENCE_COUNT:
param_value = cl_uint()
clGetEventInfo.call(event, param_name, sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
else:
raise ValueError("Unknown parameter type: %s" % param_name)
####################
# Platform Objects #
####################
class cl_platform(void_p):
"""
Represents an OpenCL Platform.
Should not be directly instantiated by users of PyCL.
Use :func:`clGetPlatformIDs` or the :attr:`platform` attribute of
some OpenCL objects to procure a cl_platform instance.
"""
def __repr__(self):
try:
return "<cl_platform '%s'>" % self.name
except:
return "<cl_platform 0x%x>" % (self.value or 0)
@property
def name(self):
"""
Name of the platform. (str)
"""
return clGetPlatformInfo(self, CL_PLATFORM_NAME)
@property
def vendor(self):
"""
Vendor that distributes the platform. (str)
"""
return clGetPlatformInfo(self, CL_PLATFORM_VENDOR)
@property
def version(self):
"""
Platform version. Likely starts with 'OpenCL 1.1'. (str)
"""
return clGetPlatformInfo(self, CL_PLATFORM_VERSION)
@property
def extensions(self):
"""
Platform extensions supported. (list of str)
Note that devices have their own set of extensions which
should be inspected separately.
"""
return clGetPlatformInfo(self, CL_PLATFORM_EXTENSIONS).split()
@property
def profile(self):
"""
One of 'FULL_PROFILE' or 'EMBEDDED_PROFILE'.
"""
return clGetPlatformInfo(self, CL_PLATFORM_PROFILE)
@property
def devices(self):
"""
All devices available on this platform. (list of cl_device)
"""
return clGetDeviceIDs(self)
@_wrapdll(cl_uint, P(cl_platform), P(cl_uint))
def clGetPlatformIDs():
"""
Returns a list of :class:`cl_platform` objects available on your system.
It should probably not be possible for this list to be empty if
you are able to call this function.
>>> clGetPlatformIDs() # doctest: +ELLIPSIS
(<cl_platform '...'>...)
"""
num_platforms = cl_uint()
clGetPlatformIDs.call(0, None, byref(num_platforms))
n = num_platforms.value
if n > 0:
platform_array = (cl_platform * n)()
clGetPlatformIDs.call(n, platform_array, None)
return tuple(x for x in platform_array)
else:
return ()
@_wrapdll(cl_platform, cl_platform_info, size_t, void_p, P(size_t))
def clGetPlatformInfo(platform, param_name):
"""
:param param_name: One of :class:`cl_platform_info`.
:class:`cl_platform` objects have attributes that will call this for
you, so you should probably use those instead of calling this directly.
>>> plat = clGetPlatformIDs()[0]
>>> clGetPlatformInfo(plat, CL_PLATFORM_VERSION) # doctest: +ELLIPSIS
'OpenCL ...'
>>> plat.version # doctest: +ELLIPSIS
'OpenCL ...'
Note that :const:`~cl_platform_info.CL_PLATFORM_EXTENSIONS` returns a
string while the :attr:`extensions` attribute returns a list:
>>> clGetPlatformInfo(plat, CL_PLATFORM_EXTENSIONS) # doctest: +ELLIPSIS
'...'
>>> plat.extensions # doctest: +ELLIPSIS
[...]
"""
sz = size_t()
clGetPlatformInfo.call(platform, param_name, 0, None, byref(sz))
# All parameter types currently return strings.
param_value = create_string_buffer(sz.value)
clGetPlatformInfo.call(platform, param_name, sz.value, param_value, None)
if sys.version_info[0] > 2:
return str(param_value.value, 'utf-8')
else:
return param_value.value
##################
# Device Objects #
##################
class cl_device(void_p):
"""
Represents an OpenCL Device belonging to some platform.
Should not be directly instantiated by users of PyCL.
Use :func:`clGetDeviceIDs` or the :attr:`devices` attribute of
some OpenCL objects to procure a cl_device instances.
"""
def __repr__(self):
try:
return "<cl_device '%s'>" % (self.name)
except:
return "<cl_device 0x%x>" % (self.value or 0)
# Devices have so many freaking properties that I'm not going
# to bother listing them all here. There's a for loop after the
# various type definitions that adds them all. The ones here
# take precedence.
@property
def driver_version(self):
# Defined here because it doesn't start with "CL_DEVICE_",
# so the for-loop can't handle it.
return clGetDeviceInfo(self, CL_DRIVER_VERSION)
@property
def extensions(self):
# Split extension list into an actual list.
return clGetDeviceInfo(self, CL_DEVICE_EXTENSIONS).split()
# Laziness on my part. There are a *lot* of cl_device_info constants
# representing possible inputs to clGetDeviceInfo. There should be
# convenience properties for each of these, but I don't want to type
# out all those property definitions. So we generate them.
for name, val in cl_device_info._by_name.items():
if name.startswith('CL_DEVICE_'):
propname = name[10:].lower()
if not hasattr(cl_device, propname):
setattr(cl_device, propname,
property(lambda self, val=val:
clGetDeviceInfo(self, val),
doc = "Same as calling :func:`clGetDeviceInfo` "
" with :const:`~cl_device_info.%s`" % name))
# cleanup
del name; del val; del propname
@_wrapdll(cl_platform, cl_device_type, cl_uint, P(cl_device), P(cl_uint))
def clGetDeviceIDs(platform=None,
device_type = cl_device_type.CL_DEVICE_TYPE_ALL):
"""
:param platform: The :class:`cl_platform` whose devices you are
interested in. If none is provided, the first platform on the
system is used.
:param device_type: A :class:`cl_device_type` bitfield indicating which
devices should be listed. By default, all are listed.
>>> clGetDeviceIDs() # doctest: +ELLIPSIS
(<cl_device '...'>...)
"""
num_devices = cl_uint()
if platform is None:
platform = clGetPlatformIDs()[0]
clGetDeviceIDs.call(platform, device_type, 0, None, byref(num_devices))
n = num_devices.value
if n > 0:
device_array = (cl_device*n)()
clGetDeviceIDs.call(platform, device_type, num_devices,
device_array, None)
return tuple(x for x in device_array)
else:
return ()
# clGetDeviceInfo has a lot of different possible return types.
# Anything not handled identified in one of these sets or in
# a special case in the wrapper function is assumed to return a cl_uint.
_device_info_sizes = frozenset((CL_DEVICE_MAX_WORK_GROUP_SIZE,
CL_DEVICE_IMAGE2D_MAX_WIDTH,
CL_DEVICE_IMAGE2D_MAX_HEIGHT,
CL_DEVICE_IMAGE3D_MAX_WIDTH,
CL_DEVICE_IMAGE3D_MAX_DEPTH,
CL_DEVICE_MAX_PARAMETER_SIZE,
CL_DEVICE_PROFILING_TIMER_RESOLUTION))
_device_info_ulongs = frozenset((CL_DEVICE_MAX_MEM_ALLOC_SIZE,
CL_DEVICE_GLOBAL_MEM_CACHE_SIZE,
CL_DEVICE_GLOBAL_MEM_SIZE,
CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
CL_DEVICE_LOCAL_MEM_SIZE))
_device_info_bools = frozenset((CL_DEVICE_IMAGE_SUPPORT,
CL_DEVICE_HOST_UNIFIED_MEMORY,
CL_DEVICE_ENDIAN_LITTLE,
CL_DEVICE_AVAILABLE,
CL_DEVICE_COMPILER_AVAILABLE))
_device_info_strings = frozenset((CL_DEVICE_NAME,
CL_DEVICE_VENDOR,
CL_DRIVER_VERSION,
CL_DEVICE_PROFILE,
CL_DEVICE_VERSION,
CL_DEVICE_EXTENSIONS))
@_wrapdll(cl_device, cl_device_info, size_t, void_p, P(size_t))
def clGetDeviceInfo(device, param_name):
"""
:param device: A :class:`cl_device`.
:param param_name: The :class:`cl_device_info` item to be queried.
:class:`cl_device` objects have attributes that will call this for
you, so you should probably use those instead of calling this directly.
>>> d = clGetDeviceIDs()[0]
>>> clGetDeviceInfo(d, CL_DEVICE_NAME) # doctest: +ELLIPSIS
'...'
>>> clGetDeviceInfo(d, CL_DEVICE_TYPE) # doctest: +ELLIPSIS
CL_DEVICE_TYPE_...
>>> d.available
True
>>> d.max_work_item_sizes # doctest: +ELLIPSIS
(...)
Note that :const:`~cl_device_info.CL_DEVICE_EXTENSIONS` returns a
string while the :attr:`extensions` attribute returns a list:
>>> clGetDeviceInfo(d, CL_DEVICE_EXTENSIONS) # doctest: +ELLIPSIS
'...'
>>> d.extensions # doctest: +ELLIPSIS
[...]
"""
if param_name == CL_DEVICE_TYPE:
param_value = cl_device_type()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name in _device_info_sizes:
param_value = size_t()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
elif param_name in _device_info_ulongs:
param_value = cl_ulong()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
elif param_name in _device_info_bools:
param_value = cl_bool()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return bool(param_value.value)
elif param_name in _device_info_strings:
sz = size_t()
clGetDeviceInfo.call(device, param_name, 0, None, byref(sz))
param_value = create_string_buffer(sz.value)
clGetDeviceInfo.call(device, param_name, sz, param_value, None)
if sys.version_info[0] > 2:
return str(param_value.value, 'utf-8')
else:
return param_value.value
elif param_name == CL_DEVICE_SINGLE_FP_CONFIG:
param_value = cl_device_fp_config()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name == CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
param_value = cl_device_mem_cache_type()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name == CL_DEVICE_LOCAL_MEM_TYPE:
param_value = cl_device_local_mem_type()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name == CL_DEVICE_MAX_WORK_ITEM_SIZES:
sz = size_t()
clGetDeviceInfo.call(device, param_name, 0, None, byref(sz))
nd = sz.value // sizeof(size_t)
param_value = (size_t * nd)()
clGetDeviceInfo.call(device, param_name, sz,
byref(param_value), None)
return tuple(int(x) for x in param_value)
elif param_name == CL_DEVICE_EXECUTION_CAPABILITIES:
param_value = cl_device_exec_capabilities()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name == CL_DEVICE_QUEUE_PROPERTIES:
param_value = cl_command_queue_properties()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name == CL_DEVICE_PLATFORM:
param_value = cl_platform()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
else:
param_value = cl_uint()
clGetDeviceInfo.call(device, param_name, sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
###################
# Context Objects #
###################
class cl_context(void_p):
"""
Represents an OpenCL Context instance.
Use :func:`clCreateContext` or :func:`clCreateContextFromType` to
create a new context.
Participates in OpenCL's reference counting scheme.
"""
@property
def platform(self):
"""
Retrieve the platform this context was made using. (cl_platform)
"""
try: return self._platform
except AttributeError:
return clGetContextInfo(self, CL_CONTEXT_PLATFORM)
@property
def reference_count(self):
"""
Reference count for OpenCL's internal garbage collector. (int)
Using :func:`clReleaseContext` via pycl is an excellent way to
generate segmentation faults.
"""
return clGetContextInfo(self, CL_CONTEXT_REFERENCE_COUNT)
@property
def num_devices(self):
"""
Number of devices present in this particular context. (int)
"""
return clGetContextInfo(self, CL_CONTEXT_NUM_DEVICES)
@property
def devices(self):
"""
List of devices present in this particular context.
(list of :class:`cl_device`)
"""
try: return self._context
except AttributeError:
return clGetContextInfo(self, CL_CONTEXT_DEVICES)
@property
def properties(self):
"""
Low-level ctypes array that is probably not user-interpretable.
"""
return clGetContextInfo(self, CL_CONTEXT_PROPERTIES)
def __repr__(self):
try:
plat = self.platform.name
except:
plat = "Unknown"
nd = self.num_devices or 0
address = self.value or 0
return "<cl_context %s:%d 0x%x>" % (plat, nd, address)
def __del__(self):
try:
if self and self.reference_count > 0:
clReleaseContext(self)
except:
pass
@_wrapdll(P(cl_context_properties), cl_uint, P(cl_device),
void_p, void_p, P(cl_errnum),
res=cl_context, err=_lastarg_errcheck)
def clCreateContext(devices=None, platform=None, other_props=None):
"""
Create a context with the given devices and platform.
:param devices: A list of devices. If None, the first device from
the given platform is used.
:param platform: If no platform or devices are provided, the first
platform found will be used. If a device list is provided but no
platform, the platform will be recovered from the devices.
If you just need a context and don't care what you get, calling with
no arguments should hopefully get you something usable.
>>> clCreateContext() # doctest: +ELLIPSIS
<cl_context ...>
>>> one_device = clGetDeviceIDs()[0]
>>> clCreateContext(devices = [one_device]) # doctest: +ELLIPSIS
<cl_context ...>
"""
properties = dict()
if platform is None:
if devices:
platform = devices[0].platform
else:
platform = clGetPlatformIDs()[0]
properties[CL_CONTEXT_PLATFORM] = platform
if other_props:
properties.update(other_props)
if devices is None:
devices = platform.devices[:1]
props = (cl_context_properties*(2*len(properties)+1))()
for i, p in enumerate(properties):
props[2*i] = p.value
try:
props[2*i+1] = properties[p]
except TypeError:
props[2*i+1] = properties[p].value
props[2*len(properties)] = 0
if devices:
device_array = (cl_device * len(devices))()
else:
device_array = None
for i, d in enumerate(devices):
device_array[i] = d
ctx = clCreateContext.call(props, len(devices), device_array,
None, None, byref(cl_errnum()))
if clGetContextInfo(ctx, CL_CONTEXT_REFERENCE_COUNT) < 1:
raise ValueError("Unusable context")
return ctx
@_wrapdll(P(cl_context_properties), cl_device_type, void_p, void_p,
P(cl_errnum), res=cl_context, err=_lastarg_errcheck)
def clCreateContextFromType(device_type=cl_device_type.CL_DEVICE_TYPE_DEFAULT,
platform=None, other_props=None):
"""
Like :func:`clCreateContext`, but works by device type instead
of expecting you to list the desired devices. This can, for instance,
be used to create a context with GPU devices without the user having
to pick a platform and inspect its device list.
:param device_type: A :class:`cl_device_type` field indicating which
types of devices should be included.
:param platform: A :class:`cl_platform`. If no platform is provided,
each platform will be tried in turn until a context with the specified
device type can created.
If you just need a context and don't care what you get, calling with
no arguments should hopefully get you something usable.
>>> clCreateContextFromType(CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU) # doctest: +ELLIPSIS
<cl_context ...>
"""
properties = dict()
if platform is None:
all_plats = clGetPlatformIDs()
for plat in all_plats:
try:
ctx = clCreateContextFromType(device_type, plat, other_props)
except ValueError:
continue
return ctx
else:
raise ValueError("Could not create suitable context")
properties[CL_CONTEXT_PLATFORM] = platform
if other_props:
properties.update(other_props)
props = (cl_context_properties*(2*len(properties)+1))()
for i, p in enumerate(properties):
props[2*i] = p.value
try:
props[2*i+1] = properties[p]
except TypeError:
props[2*i+1] = properties[p].value
props[2*len(properties)] = 0
ctx = clCreateContextFromType.call(props, device_type,
None, None, byref(cl_errnum()))
if clGetContextInfo(ctx, CL_CONTEXT_REFERENCE_COUNT) < 1:
raise ValueError("Unusable context")
return ctx
@_wrapdll(cl_context, cl_context_info, size_t, void_p, P(size_t))
def clGetContextInfo(context, param_name):
"""
Retrieve context info.
:param context: :class:`cl_context`.
:param param_name: One of the :class:`cl_context_info` values.
:class:`cl_context` objects have attributes that will call this for
you, so you should probably use those instead of calling this directly.
>>> ctx = clCreateContext()
>>> clGetContextInfo(ctx, CL_CONTEXT_DEVICES) # doctest: +ELLIPSIS
(<cl_device ...>...)
>>> ctx.platform # doctest: +ELLIPSIS
<cl_platform ...>
>>> ctx.reference_count
1
>>> ctx.properties # doctest: +ELLIPSIS
<...cl_context_properties_Array...>
"""
if param_name == CL_CONTEXT_DEVICES:
try:
return context._devices
except AttributeError:
sz = size_t()
clGetContextInfo.call(context, param_name, 0, None, byref(sz))
num_dev = sz.value//sizeof(cl_device)
dev_array = (cl_device * num_dev)()
clGetContextInfo.call(context, param_name, sz, dev_array, None)
context._devices = tuple(x for x in dev_array)
return context._devices
elif param_name == CL_CONTEXT_PROPERTIES:
sz = size_t()
clGetContextInfo.call(context, param_name, 0, None, byref(sz))
num_props = sz.value//sizeof(cl_context_properties)
props = (cl_context_properties * num_props)()
clGetContextInfo.call(context, param_name, sz, props, None)
# TODO
# It's not entirely clear how we should present the result object
# to the user, since other than CL_CONTEXT_PLATFORM the possible
# values are all extension-dependent. For now, just return it.
return props
elif param_name == CL_CONTEXT_PLATFORM:
# Not actually a valid input, but it should probably be
# available in the properties list.
try:
return context._platform
except AttributeError:
props = clGetContextInfo(context, CL_CONTEXT_PROPERTIES)
for i in range(0, len(props)-1, 2):
if props[i].value == CL_CONTEXT_PLATFORM.value:
context._platform = cl_platform(props[i+1].value)
break
else:
context._platform = None
return context._platform
elif param_name == CL_CONTEXT_NUM_DEVICES:
# Sidestep bug in NVIDIA OpenCL driver by calculating
# this in another fashion.
sz = size_t()
clGetContextInfo.call(context, CL_CONTEXT_DEVICES, 0, None, byref(sz))
return sz.value//sizeof(cl_device)
else:
param_value = cl_uint()
clGetContextInfo.call(context, param_name, sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
@_wrapdll(cl_context)
def clRetainContext(context):
# Not for end-user use
clRetainContext.call(context)
@_wrapdll(cl_context)
def clReleaseContext(context):
# Not for end-user use
clReleaseContext.call(context)
##################
# Command Queues #
##################
class cl_command_queue(void_p):
"""
Represents an OpenCL Command Queue instance.
Should not be directly instantiated by users of PyCL.
Use :func:`clCreateCommandQueue` to create a new queue.
"""
@property
def context(self):
"""
The context associated with the command queue. (:class:`cl_context`)
"""
return clGetCommandQueueInfo(self, CL_QUEUE_CONTEXT)
@property
def device(self):
"""
The device associated with the command queue. (:class:`cl_device`)
"""
return clGetCommandQueueInfo(self, CL_QUEUE_DEVICE)
@property
def properties(self):
"""
Command queue property bitfield. (:class:`cl_command_queue_properties`)
"""
return clGetCommandQueueInfo(self, CL_QUEUE_PROPERTIES)
@property
def reference_count(self):
"""
Reference count for OpenCL's garbage collector. (int)
"""
return clGetCommandQueueInfo(self, CL_QUEUE_REFERENCE_COUNT)
def __repr__(self):
try:
dev = self.device
return "<cl_command_queue '%s'>" % dev.name
except:
return "<cl_command_queue 0x%x>" % (self.value or 0)
def __del__(self):
try:
if self and self.reference_count > 0:
clReleaseCommandQueue(self)
except:
pass
@_wrapdll(cl_context, cl_device, cl_command_queue_properties, P(cl_errnum),
res=cl_command_queue, err=_lastarg_errcheck)
def clCreateCommandQueue(context=None, device=None, properties=None):
"""
:param context: :class:`cl_context`. If not provided, one will be
generated for you by calling :func:`clCreateContext` with no arguments.
(it can later be retrieved via the :attr:`context` attribute)
:param device: The :class:`cl_device` that will be fed by this queue.
If no device is provided, the first device in the context will be used.
:param properties: A :class:`cl_command_queue_properties` bitfield.
"""
if context is None:
context = clCreateContext()
if device is None:
device = context.devices[0]
if properties is None:
properties = cl_command_queue_properties.NONE
queue = clCreateCommandQueue.call(context, device, properties,
byref(cl_errnum()))
queue._context = context
return queue
@_wrapdll(cl_command_queue, cl_command_queue_info, size_t, void_p, P(size_t))
def clGetCommandQueueInfo(queue, param_name):
"""
:param queue: :class:`cl_command_queue`.
:param param_name: One of the :class:`cl_command_queue_info` values.
>>> q = clCreateCommandQueue()
>>> q.context # doctest: +ELLIPSIS
<cl_context ...>
>>> q.device # doctest: +ELLIPSIS
<cl_device ...>
>>> q.properties # doctest: +ELLIPSIS
NONE
>>> q.reference_count
1
"""
if param_name == CL_QUEUE_CONTEXT:
param_value = cl_context()
clGetCommandQueueInfo.call(queue, param_name, sizeof(param_value),
byref(param_value), None)
# Calling this doesn't increase the reference count,
# so we need to do that.
clRetainContext(param_value)
return param_value
elif param_name == CL_QUEUE_DEVICE:
param_value = cl_device()
clGetCommandQueueInfo.call(queue, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name == CL_QUEUE_PROPERTIES:
param_value = cl_command_queue_properties()
clGetCommandQueueInfo.call(queue, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name == CL_QUEUE_REFERENCE_COUNT:
param_value = cl_uint()
clGetCommandQueueInfo.call(queue, param_name, sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
else:
raise ValueError("Unrecognized parameter %s" % param_name)
@_wrapdll(cl_command_queue)
def clRetainCommandQueue(queue):
# Not for end-user use
clRetainCommandQueue.call(queue)
@_wrapdll(cl_command_queue)
def clReleaseCommandQueue(queue):
# Not for end-user use
clReleaseCommandQueue.call(queue)
##################
# Memory Objects #
##################
class cl_mem(void_p):
"""
Represents an OpenCL memory object, typically a buffer or image.
Use :func:`clCreateBuffer` or similar to make them.
Memory objects are reference counted.
"""
@property
def size(self):
"""Memory size, in bytes."""
try: return self._size
except AttributeError:
return clGetMemObjectInfo(self, CL_MEM_SIZE)
@property
def offset(self):
"""Offset, in bytes, from origin (for sub-buffers)"""
try: return self._offset
except AttributeError:
return clGetMemObjectInfo(self, CL_MEM_OFFSET)
@property
def base(self):
"""Base memory object (for sub-buffers)"""
try: return self._base
except AttributeError:
return clGetMemObjectInfo(self, CL_MEM_ASSOCIATED_MEMOBJECT)
@property
def reference_count(self):
"""Reference count for OpenCL garbage collector."""
return clGetMemObjectInfo(self, CL_MEM_REFERENCE_COUNT)
@property
def map_count(self):
"""Number of memory maps currently active for this object."""
return clGetMemObjectInfo(self, CL_MEM_MAP_COUNT)
@property
def hostptr(self):
"""Pointer to host address associated with this memory
object at the time of creation. The meaning varies depending
on the flags. (type is :c:type:`void*`)"""
try: return self._hostptr
except AttributeError:
return clGetMemObjectInfo(self, CL_MEM_HOST_PTR)
@property
def flags(self):
"""The :class:`cl_mem_flags` the object was created with."""
return clGetMemObjectInfo(self, CL_MEM_FLAGS)
@property
def type(self):
"""The :class:`cl_mem_type` of the object."""
return clGetMemObjectInfo(self, CL_MEM_TYPE)
@property
def context(self):
"""The :class:`cl_context` the memory belongs to."""
try: return self._context
except AttributeError:
return clGetMemObjectInfo(self, CL_MEM_CONTEXT)
def __del__(self):
try:
if self:
clReleaseMemObject(self)
except:
pass
def empty_like_this(self):
"""Creates an empty read/write buffer of the same size
in the same context and returns it."""
return clCreateBuffer(self.context, self.size)
@_wrapdll(cl_context, cl_mem_flags, size_t, void_p, P(cl_errnum),
res=cl_mem, err=_lastarg_errcheck)
def clCreateBuffer(context, size, flags = cl_mem_flags.CL_MEM_READ_WRITE,
host_ptr = None):
"""
:param context: :class:`cl_context` that will own this memory.
:param size: Desired size (in bytes) of the memory.
:param flags: :class:`cl_mem_flags` to control the memory.
:param host_ptr: :c:type:`void*` to associated with this memory.
The meaning of the association depends on the flags. (An integer
representation of a pointer is fine).
See also :func:`buffer_from_ndarray`, :func:`buffer_from_pyarray`
"""
mem = clCreateBuffer.call(context, flags, size, host_ptr,
byref(cl_errnum()))
mem._size = size
mem._context = context
mem._base = host_ptr
mem._flags = flags
return mem
@_wrapdll(cl_mem)
def clRetainMemObject(mem):
clRetainMemObject.call(mem)
@_wrapdll(cl_mem)
def clReleaseMemObject(mem):
clReleaseMemObject.call(mem)
@_wrapdll(cl_command_queue, cl_mem, cl_bool, size_t, size_t,
void_p, cl_uint, P(cl_event), P(cl_event))
def clEnqueueReadBuffer(queue, mem, pointer, size=None,
blocking=True, offset=0, wait_for=None):
"""
Read from a :class:`cl_mem` buffer into host memory.
:param queue: :class:`cl_command_queue` to queue it on.
:param mem: :class:`cl_mem` to read from. Must be a buffer.
:param pointer: :c:type:`void*` pointer, the address to start
writing into. (An integer representation of the pointer is fine).
:param size: Number of bytes to read. If not specified, the entire
buffer is read out, which might be hazardous if the place you're
writing it to isn't big enough.
:param blocking: Wait for the transfer to complete. Default is True.
If False, you can use the returned event to check its status.
:param offset: Offset in the buffer at which to start reading. Default is 0.
:param wait_for: :class:`cl_event` (or a list of them) that must complete
before the memory transfer will commence.
:returns: :class:`cl_event`
See also :func:`buffer_to_ndarray` and :func:`buffer_to_pyarray`.
>>> ctx = clCreateContext()
>>> queue = clCreateCommandQueue(ctx)
>>> array1 = (cl_int * 8)() # 32 bytes
>>> for i in range(8): array1[i] = i
>>> m = clCreateBuffer(ctx, 32)
>>> clEnqueueWriteBuffer(queue, m, array1, 32) # doctest: +ELLIPSIS
<cl_event ...>
>>> array2 = (cl_int * 8)()
>>> clEnqueueReadBuffer(queue, m, array2, 32) # doctest: +ELLIPSIS
<cl_event ...>
>>> [x.value for x in array2]
[0, 1, 2, 3, 4, 5, 6, 7]
"""
if size is None:
size = clGetMemObjectInfo(mem, CL_MEM_SIZE)
nevents, wait_array = _make_event_array(wait_for)
out_event = cl_event()
clEnqueueReadBuffer.call(queue, mem, blocking, offset, size, pointer,
nevents, wait_array, byref(out_event))
return out_event
@_wrapdll(cl_command_queue, cl_mem, cl_bool, size_t, size_t,
void_p, cl_uint, P(cl_event), P(cl_event))
def clEnqueueWriteBuffer(queue, mem, pointer, size=None,
blocking=True, offset=0, wait_for=None):
"""
Write to a :class:`cl_mem` buffer from a location in host memory.
See :func:`clEnqueueReadBuffer` for the meanings of the parameters.
"""
if size is None:
size = clGetMemObjectInfo(mem, CL_MEM_SIZE)
nevents, wait_array = _make_event_array(wait_for)
out_event = cl_event()
clEnqueueWriteBuffer.call(queue, mem, blocking, offset, size, pointer,
nevents, wait_array, byref(out_event))
return out_event
@_wrapdll(cl_mem, cl_mem_info, size_t, void_p, P(size_t))
def clGetMemObjectInfo(mem, param_name):
"""
:param mem: :class:`cl_mem`
:param param_name: One of the :class:`cl_mem_info` values.
Memory objects have properties that will retrieve these
values for you, so you should probably use those.
"""
if param_name == CL_MEM_TYPE:
param_value = cl_mem_object_type()
clGetMemObjectInfo.call(mem, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name == CL_MEM_FLAGS:
param_value = cl_mem_flags()
clGetMemObjectInfo.call(mem, param_name, sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name == CL_MEM_SIZE:
try: return mem._size
except AttributeError: pass
param_value = size_t()
clGetMemObjectInfo.call(mem, param_name, sizeof(param_value),
byref(param_value), None)
mem._size = int(param_value.value)
return mem._size
elif param_name == CL_MEM_OFFSET:
param_value = size_t()
clGetMemObjectInfo.call(mem, param_name, sizeof(param_value),
byref(param_value), None)
mem._offset = int(param_value.value)
return mem._offset
elif param_name in (CL_MEM_MAP_COUNT, CL_MEM_REFERENCE_COUNT):
param_value = cl_uint()
clGetMemObjectInfo.call(mem, param_name, sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
elif param_name == CL_MEM_ASSOCIATED_MEMOBJECT:
try: return mem._base
except AttributeError: pass
param_value = cl_mem()
clGetMemObjectInfo.call(mem, param_name, sizeof(param_value),
byref(param_value), None)
if param_value: mem._base = param_value
else: mem._base = None
return mem._base
elif param_name == CL_MEM_CONTEXT:
try: return mem._context
except AttributeError: pass
param_value = cl_context()
clGetMemObjectInfo.call(mem, param_name, sizeof(param_value),
byref(param_value), None)
clRetainContext(param_value)
mem._context = param_value
return param_value
elif param_name == CL_MEM_HOST_PTR:
try: return mem._hostptr
except AttributeError: pass
param_value = void_p()
clGetMemObjectInfo.call(mem, param_name, sizeof(param_value),
byref(param_value), None)
if param_value: mem._hostptr = param_value
else: mem._hostptr = None
return mem._hostptr
else: # e.g., CL_MEM_D3D10_RESOURCE_KHR
param_value = void_p()
clGetMemObjectInfo.call(mem, param_name, sizeof(param_value),
byref(param_value), None)
return param_value or None
#################
# Image Objects #
#################
@_wrapdll(cl_context, cl_mem_flags, P(cl_image_format),
size_t, size_t, size_t, void_p, P(cl_errnum),
res = cl_mem, err = _lastarg_errcheck)
def clCreateImage2D(context, width, height,
imgformat = None, # If we can guess it
flags = cl_mem_flags.CL_MEM_READ_WRITE,
host_ptr = None, rowpitch = 0):
if isinstance(imgformat, (tuple, list)):
imgformat = cl_image_format(*imgformat)
assert imgformat is not None, "don't know how to guess this yet"
img = clCreateImage2D.call(context, flags, byref(imgformat),
width, height, rowpitch,
host_ptr, byref(cl_errnum()))
img._context = context
img._width = width
img._height = height
img._base = host_ptr
img._flags = flags
img._format = imgformat
return img
@_wrapdll(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, P(cl_image_format), P(cl_uint))
def clGetSupportedImageFormats(context=None,
type=cl_mem_object_type.CL_MEM_OBJECT_IMAGE2D,
flags=cl_mem_flags.CL_MEM_READ_WRITE):
if context is None:
context = clCreateContext()
num = cl_uint()
clGetSupportedImageFormats.call(context, flags, type, 0, None, byref(num))
formats = (cl_image_format * num.value)()
clGetSupportedImageFormats.call(context, flags, type, num, formats, None)
return [f for f in formats]
###################
# Program Objects #
###################
class cl_program(void_p):
"""
Represents an OpenCL program, a container for kernels.
Use :func:`clCreateProgramWithSource` or
:func:`clCreateProgramWithBinary` to make a program.
Remember to call :meth:`build` to compile source programs.
You can retrieve a kernel like so:
>>> my_kernel = my_program['my_kernel'] # doctest: +SKIP
Programs participate in reference counting.
"""
def build(self, *args, **kw):
"""
Calls :func:`clBuildProgram` on the program, passing
along any arguments you provide. The program itself will
be returned, so you can use this idiom:
>>> source = 'kernel void foo(float bar) {}'
>>> ctx = clCreateContext()
>>> prog = clCreateProgramWithSource(ctx, source).build()
"""
clBuildProgram(self, *args, **kw)
return self
def __getitem__(self, name):
if not hasattr(self, '_kernels'):
self._kernels = dict()
if name in self._kernels:
return self._kernels[name]
else:
return clCreateKernel(self, name)
@property
def context(self):
"""Returns the context the program exists within."""
try:
return self._context
except AttributeError:
return clGetProgramInfo(self, CL_PROGRAM_CONTEXT)
@property
def reference_count(self):
"""Reference count for OpenCL garbage collector."""
return clGetProgramInfo(self, CL_PROGRAM_REFERENCE_COUNT)
@property
def num_devices(self):
"""Number of devices the program exists on."""
return clGetProgramInfo(self, CL_PROGRAM_NUM_DEVICES)
@property
def devices(self):
"""Devices on which the program exists."""
return clGetProgramInfo(self, CL_PROGRAM_DEVICES)
@property
def source(self):
"""Program's source code, if available."""
return clGetProgramInfo(self, CL_PROGRAM_SOURCE)
@property
def binary_sizes(self):
"""Sizes, in bytes, of the binaries for each of the
devices the program is compiled for."""
return clGetProgramInfo(self, CL_PROGRAM_BINARY_SIZES)
@property
def binaries(self):
"""Acquires the binaries for each device."""
return clGetProgramInfo(self, CL_PROGRAM_BINARIES)
def build_status(self, device=None):
"""
Retrieves the :class:`cl_program_build_status` for one of more devices.
See also :func:`clGetProgramBuildInfo`
"""
return clGetProgramBuildInfo(self, CL_PROGRAM_BUILD_STATUS, device)
def build_options(self, device=None):
"""
Retrieves the build options, as a string, for one of more devices.
See also :func:`clGetProgramBuildInfo`.
"""
return clGetProgramBuildInfo(self, CL_PROGRAM_BUILD_OPTIONS, device)
def build_log(self, device=None):
"""
Returns the build log, as a string, for one or more devices.
Mostly useful for checking compiler errors.
See also :func:`clGetProgramBuildInfo`.
"""
return clGetProgramBuildInfo(self, CL_PROGRAM_BUILD_LOG, device)
def __del__(self):
try:
if self: clReleaseProgram(self)
except: pass
@_wrapdll(cl_program, cl_program_info, size_t, void_p, P(size_t))
def clGetProgramInfo(program, param_name):
"""
:param program: :class:`cl_program`
:param param_name: One of the :class:`cl_program_info` values.
"""
if param_name == CL_PROGRAM_CONTEXT:
try:
return program._context
except AttributeError:
param_value = cl_context()
clGetProgramInfo.call(program, param_name, sizeof(param_value),
param_value, None)
clRetainContext(param_value)
program._context = param_value
return param_value
elif param_name in (CL_PROGRAM_REFERENCE_COUNT,
CL_PROGRAM_NUM_DEVICES):
param_value = cl_uint()
clGetProgramInfo.call(program, param_name, sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
elif param_name == CL_PROGRAM_DEVICES:
sz = size_t()
clGetProgramInfo.call(program, param_name, 0, None, byref(sz))
nd = sz.value // sizeof(cl_device)
param_value = (cl_device * nd)()
clGetProgramInfo.call(program, param_name, sz, param_value, None)
return [x for x in param_value]
elif param_name == CL_PROGRAM_SOURCE:
sz = size_t()
clGetProgramInfo.call(program, param_name, 0, None, byref(sz))
param_value = create_string_buffer(sz.value)
clGetProgramInfo.call(program, param_name, sz, param_value, None)
return param_value.value
elif param_name == CL_PROGRAM_BINARY_SIZES:
sz = size_t()
clGetProgramInfo.call(program, param_name, 0, None, byref(sz))
nd = sz.value // sizeof(size_t)
param_value = (size_t * nd)()
clGetProgramInfo.call(program, param_name, sz, param_value, None)
return [int(x) for x in param_value]
elif param_name == CL_PROGRAM_BINARIES:
sz = size_t()
clGetProgramInfo.call(program, param_name, 0, None, byref(sz))
nd = sz.value // sizeof(char_p)
param_value = (char_p * nd)()
binary_sizes = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES)
binaries = [None]*nd
for i, bsize in enumerate(binary_sizes):
binaries[i] = (ctypes.c_char * bsize)()
param_value[i] = cast(binaries[i], char_p)
clGetProgramInfo.call(program, param_name, sz, param_value, None)
return [x.value for x in binaries]
else:
raise ValueError("Unknown program info %s" % param_name)
@_wrapdll(cl_program, cl_device, cl_program_build_info,
size_t, void_p, P(size_t))
def clGetProgramBuildInfo(program, param_name, device=None):
"""
:param program: The :class:`cl_program` to check.
:param param_name: One of the :class:`cl_program_build_info` values.
:param device: A :class:`cl_device` instance, or list of them.
If a list of devices is provided, info will be returned
for each of them in a list.
If no device is specified, all devices associated with
the program will be used.
The :meth:`~cl_program.build_status`, :meth:`~cl_program.build_options`,
and :meth:`~cl_program.build_log` methods of program objects are
equivalent to using this, so they may be preferable.
"""
if device is None:
device = program.devices
if not isinstance(device, cl_device):
return [clGetProgramBuildInfo(program, param_name, each_device)
for each_device in program.devices]
if param_name == CL_PROGRAM_BUILD_STATUS:
param_value = cl_build_status()
clGetProgramBuildInfo.call(program, device, param_name,
sizeof(param_value),
byref(param_value), None)
return param_value
elif param_name in (CL_PROGRAM_BUILD_OPTIONS,
CL_PROGRAM_BUILD_LOG):
sz = size_t()
clGetProgramBuildInfo.call(program, device, param_name,
0, None, byref(sz))
param_value = create_string_buffer(sz.value)
clGetProgramBuildInfo.call(program, device, param_name,
sz, param_value, None)
if sys.version_info[0] > 2:
return str(param_value.value, 'utf-8')
else:
return param_value.value
else:
raise ValueError("Unknown program build info %s" % param_name)
@_wrapdll(cl_context, cl_uint, P(char_p), P(size_t), P(cl_errnum),
res = cl_program, err = _lastarg_errcheck)
def clCreateProgramWithSource(context, source):
"""
:param context: Context in which the program will exist
:param source: Source code, as a string.
Remember to call :meth:`~cl_program.build` on the program.
"""
if sys.version_info[0] > 2 and isinstance(source, str):
source = source.encode()
c_source = char_p(source)
p = pointer(c_source)
#import pdb; pdb.set_trace()
prg = clCreateProgramWithSource.call(context, 1, p,
None, byref(cl_errnum()))
prg._context = context
return prg
@_wrapdll(cl_program, cl_uint, P(cl_device), P(char_p), void_p, void_p)
def clBuildProgram(program, options=None, devices=None):
"""
Compiles a source program to run on one or more devices.
:param program: The :class:`cl_program` to build.
:param options: (optional) string with compiler options. See
your OpenCL spec and platform provider's docs for possible values.
:param devices: A list of devices to compile the program for. If not
provided, it will be built for all devices in the context.
If the build fails, it will raise a :exc:`ProgramBuildFailureError`
with details.
"""
if options is not None:
options = char_p(options)
if devices is not None:
num_devices = len(devices)
dev_array = (cl_device*num_devices)()
for i,dev in enumerate(devices):
dev_array[i] = dev
else:
devices = program.devices
num_devices = 0
dev_array = None
try:
clBuildProgram.call(program, num_devices, dev_array,
options, None, None)
except BuildProgramFailureError:
# Re-raise with appropriate message
for dev in devices:
if program.build_status(dev) == CL_BUILD_ERROR:
log = program.build_log(dev)
raise BuildProgramFailureError(log)
@_wrapdll(cl_program)
def clRetainProgram(program):
clRetainProgram.call(program)
@_wrapdll(cl_program)
def clReleaseProgram(program):
clReleaseProgram.call(program)
##################
# Kernel Objects #
##################
class cl_kernel(void_p):
"""
Represents an OpenCL kernel found in a :class:`cl_program`.
After compiling a program, the kernels will be accessible as
items whose keys are the kernel names.
Kernels are reference counted.
"""
def __del__(self):
try:
if self: clReleaseKernel(self)
except: pass
def __repr__(self):
try:
return "<cl_kernel %s %s>" % (self.name, self.argtypes)
except:
return "<cl_kernel 0x%x>" % (self.value or 0)
@property
def name(self):
"""Name of the kernel function."""
try:
return self._name
except AttributeError:
return clGetKernelInfo(self, CL_KERNEL_FUNCTION_NAME)
@property
def program(self):
"""The :class:`cl_program` this kernel lives in."""
try:
return self._program
except AttributeError:
return clGetKernelInfo(self, CL_KERNEL_PROGRAM)
@property
def context(self):
"""The :class:`cl_context` this kernel lives in."""
try:
return self._context
except AttributeError:
return clGetKernelInfo(self, CL_KERNEL_CONTEXT)
@property
def num_args(self):
"""Number of arguments required to call this kernel."""
try:
return self._num_args
except AttributeError:
return clGetKernelInfo(self, CL_KERNEL_NUM_ARGS)
@property
def reference_count(self):
"""Reference count for OpenCL garbage collector."""
return clGetKernelInfo(self, CL_KERNEL_REFERENCE_COUNT)
def __call__(self, *args, **kw):
"""
Equivalent to calling :meth:`setarg` for each of the
arguments provided. No size parameter can be provided
in this calling fashion, so be sure the datatypes are known
or easily guessed by :meth:`setarg`.
If the function takes a local memory argument, pass in
an instance of :class:`localmem` to indicate the desired size.
For example, to request that the third argument allocate 1KB of
local memory: ``mykernel(foo, bar, localmem(1024))``
The return value is the kernel itself, so that you can chain
it with further methods like :meth:`on`.
"""
for i, arg in enumerate(args):
self.setarg(i, args[i])
return self
def setarg(self, index, value=None, size=None):
"""
Sets one of the kernel's arguments.
:param index: 0-based argument number to set.
:param value: Value to set it to. Can be a :class:`cl_mem`,
a Python int or float, or a :class:`localmem` object to
indicate local memory allocation.
:param size: The size of the parameter, in bytes. PyCL will
attempt to guess if you don't tell it here or by setting
:attr:`argtypes`. Guessing is bad.
This does some extra work to try to ensure that the
data is in a form suitable for the lower-level :func:`clSetKernelArg`
call. The OpenCL API doesn't give us much help in determining
what type an argument should be, so if possible you should set
the elements of the kernel's :attr:`argtypes` field to a list of
types. The types should be either :class:`cl_mem`, :class:`localmem`,
a scalar type such as :class:`cl_int`, or a ctypes structure type.
"""
if value is None and size is None:
# Er, maybe the argument is a global pointer, and
# the user wants it set to NULL?
size = sizeof(cl_mem)
dtype = cl_mem
elif isinstance(value, localmem):
# Local memory arguments must have a null pointer
# and a size argument to indicate how many bytes
# should be allocated on the device. As a convenience,
# the user can pass in a localmem object, which serves
# as a marker and holds the desired size. This isn't
# necessary for this function, but is when using the
# kernel's __call__ method.
size = value.size
value = None
dtype = localmem
elif isinstance(value, cl_mem):
# Global memory object. Ask for its size if not specified.
if size is None:
size = sizeof(cl_mem)
dtype = cl_mem
else:
# Otherwise, consult our records to see
# what the appropriate c datatype should be.
dtype = self.argtypes[index]
if dtype is None:
# We'll try to guess it further down
pass
elif dtype is localmem:
# If the user placed localmem in the argtype,
# they can just call the kernel with an integer
# to indicate the desired size.
if size is None:
size = value
value = None
elif dtype is not cl_mem:
# Coerce the argument if necessary.
if not isinstance(value, dtype):
value = dtype(value)
size = sizeof(dtype)
# Guess scalar datatypes.
# OpenCL doesn't give us any means by which to query the type
# or size of a kernel argument. If the user gives us a ctypes
# value, we can assume they know what's up and use its type
# and size. Otherwise, if we were given an int or a float, we
# can infer the base type, but not the size... so we try each
# of the sizes that type could possibly be. clSetKernelArg will
# raise CL_INVALID_ARG_SIZE if we get it wrong, so we can choose
# based on that. Since it obviously knows the right size, it would
# be nice if it would just tell us, but at least this works.
if dtype is None:
warn('Type not specified for %s argument %d. Guessing...' % (
self.name, index), stacklevel=3)
if isinstance(value, ctypes._SimpleCData.__bases__[0]):
candidate_types = (value.__class__,)
if isinstance(value, float):
candidate_types = (cl_float, cl_double)
elif isinstance(value, str) and len(value) == 1:
candidate_types = cl_char
elif isinstance(value, int):
candidate_types = (cl_int, cl_long, cl_short, cl_char)
for t in candidate_types:
try:
scalar_value = t(value)
clSetKernelArg.call(self, index, sizeof(t),
byref(scalar_value))
# Hey, that worked. Record success.
dtype = t
value = scalar_value
size = sizeof(dtype)
self.argtypes[index] = dtype
break
except InvalidArgSizeError:
# Nope, not this one.
continue
else:
raise ValueError("Could not guess kernel datatype for arg %d. "
"Please set it in kernel.argtypes[%d]." %
(index, index))
if value is not None:
vref = byref(value)
else:
vref = None
clSetKernelArg.call(self, index, size, vref)
def _get_argtypes(self):
"""
Represents the data types of the kernel function arguments.
There is no way to ask OpenCL for this information, so short of
actually parsing the C code the only way to fill this in is to infer
it from the way the user tries to call the kernel.
Since this is error prone, we encourage you to fill in the list yourself.
"""
if not hasattr(self, '_argtypes'):
self._argtypes = [None]*self.num_args
return self._argtypes
def _set_argtypes(self, value):
if len(value) != self.num_args:
raise ValueError("Expected %d arguments." % self.num_args)
self._argtypes = value
argtypes = property(_get_argtypes, _set_argtypes,
doc=_get_argtypes.__doc__)
def on(self, queue, *args, **kw):
"""
Enqueue the kernel (hopefully after setting its arguments)
upon a command queue. This is essetially a shortcut for
:func:`clEnqueueNDRangeKernel`.
"""
return clEnqueueNDRangeKernel(queue, self, *args, **kw)
def work_group_size(self, device=None):
"""
The maximum size of workgroups for this kernel on the
specified device.
"""
return clGetKernelWorkGroupInfo(self,
CL_KERNEL_WORK_GROUP_SIZE, device)
def compile_work_group_size(self, device=None):
"""
The work group size specified by the kernel source, if any.
Otherwise, will return (0,0,0).
"""
return clGetKernelWorkGroupInfo(self,
CL_KERNEL_COMPILE_WORK_GROUP_SIZE, device)
def local_mem_size(self, device=None):
"""
The amount of local memory that would be used by this kernel
on the given device with its current argument set.
"""
return clGetKernelWorkGroupInfo(self,
CL_KERNEL_LOCAL_MEM_SIZE, device)
def preferred_work_group_size_multiple(self, device=None):
"""
Suggests a workgroup size multiplier for each dimension.
That is, if a multiple is 8, then workgroup sizes should preferably
be multiples of 8.
"""
return clGetKernelWorkGroupInfo(self,
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, device)
def private_mem_size(self, device=None):
"""
Amount of private memory needed to execute each workitem on the device.
"""
return clGetKernelWorkGroupInfo(self,
CL_KERNEL_PRIVATE_MEM_SIZE, device)
@_wrapdll(cl_program, char_p, P(cl_errnum),
res = cl_kernel, err = _lastarg_errcheck)
def clCreateKernel(program, kernel_name):
"""
:param program: :class:`cl_program`
:param kernel_name: String naming a kernel function in the program.
Using the the ``program[kernel_name]`` syntax is preferable.
"""
if sys.version_info[0] > 2 and isinstance(kernel_name, str):
kernel_name = kernel_name.encode()
kernel = clCreateKernel.call(program, char_p(kernel_name),
byref(cl_errnum()))
kernel._program = program
kernel._context = program.context
if not hasattr(program, '_kernels'):
program._kernels = dict()
program._kernels[kernel_name] = kernel
return kernel
@_wrapdll(cl_kernel, cl_kernel_info, size_t, void_p, P(size_t))
def clGetKernelInfo(kernel, param_name):
"""
:param kernel: :class:`cl_kernel`
:param param_name: One of the :class:`cl_kernel_info` values.
Kernel objects have properties that call this function, so it
is probably preferable to use those instead.
"""
if param_name == CL_KERNEL_FUNCTION_NAME:
sz = size_t()
clGetKernelInfo.call(kernel, param_name, 0, None, byref(sz))
param_value = create_string_buffer(sz.value)
clGetKernelInfo.call(kernel, param_name, sz, param_value, None)
return param_value.value
elif param_name == CL_KERNEL_CONTEXT:
param_value = cl_context()
clGetKernelInfo.call(kernel, param_name, sizeof(param_value),
byref(param_value), None)
clRetainContext(param_value)
return param_value
elif param_name == CL_KERNEL_PROGRAM:
try:
return kernel._program
except AttributeError:
param_value = cl_program()
clGetKernelInfo.call(kernel, param_name, sizeof(param_value),
byref(param_value), None)
clRetainProgram(param_value)
kernel._program = program
return param_value
elif param_name == CL_KERNEL_CONTEXT:
try:
return kernel._context
except AttributeError:
param_value = cl_context()
clGetKernelInfo.call(kernel, param_name, sizeof(param_value),
byref(param_value), None)
clRetainContext(param_value)
kernel._context = context
return param_value
elif param_name == CL_KERNEL_NUM_ARGS:
try:
return kernel._num_args
except AttributeError:
param_value = cl_uint()
clGetKernelInfo.call(kernel, param_name, sizeof(param_value),
byref(param_value), None)
kernel._num_args = int(param_value.value)
return kernel._num_args
elif param_name == CL_KERNEL_REFERENCE_COUNT:
param_value = cl_uint()
clGetKernelInfo.call(kernel, param_name, sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
else:
raise ValueError("Unknown kernel info type %s" % param_name)
@_wrapdll(cl_kernel, cl_device, cl_kernel_work_group_info,
size_t, void_p, P(size_t))
def clGetKernelWorkGroupInfo(kernel, param_name, device=None):
"""
:param kernel: :class:`cl_kernel`
:param param_name: One of the :class:`cl_kernel_work_group_info` values.
:param device: :class:`cl_device`. If no device is specified, the first
device in the kernel's context is queried.
Retrieves information about the kernel specific to a particular device
that it might be run on. This information is also available through
specific methods of kernel objects, which may be preferable to calling this.
"""
if device is None:
device = kernel.context.devices[0]
if param_name in (CL_KERNEL_WORK_GROUP_SIZE,
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE):
param_value = size_t()
clGetKernelWorkGroupInfo.call(kernel, device, param_name,
sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
elif param_name in (CL_KERNEL_LOCAL_MEM_SIZE,
CL_KERNEL_PRIVATE_MEM_SIZE):
param_value = cl_ulong()
clGetKernelWorkGroupInfo.call(kernel, device, param_name,
sizeof(param_value),
byref(param_value), None)
return int(param_value.value)
elif param_name == CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
param_value = (size_t * 3)()
clGetKernelWorkGroupInfo.call(kernel, device, param_name,
sizeof(param_value),
byref(param_value), None)
return [int(x) for x in param_value]
else:
raise ValueError("Unknown param name %s" % param_name)
@_wrapdll(cl_kernel)
def clRetainKernel(kernel):
clRetainKernel.call(kernel)
@_wrapdll(cl_kernel)
def clReleaseKernel(kernel):
clReleaseKernel.call(kernel)
class localmem(object):
"""
When a kernel defines an argument to be in local memory,
no value can be passed in to that argument. Instead, the
size of the local memory is specified. While you could do
this directly with :func:`clSetKernelArg`, localmem allows
you to set this using the kernel call syntax. So if you had
a kernel whose third argument was a local memory pointer,
you could set the arguments like so:
>>> mykernel(x, y, localmem(1024)) # doctest: +SKIP
localmem is also accepted in :attr:`~cl_kernel.argtypes`, in
which case the kernel can be called using just the desired size:
>>> mykernel.argtypes = (cl_mem, cl_mem, localmem) # doctest: +SKIP
>>> mykernel(x, y, 1024) # doctest: +SKIP
"""
def __init__(self, size):
self.size = size
@_wrapdll(cl_kernel, cl_uint, size_t, void_p)
def clSetKernelArg(kernel, index, value = None, size = None):
"""
:param kernel: :class:`cl_kernel`
:param index: 0-based argument index to set.
:param value: Should be None or a pointer to a ctypes
scalar or a :class:`cl_mem` object. Does not accept :class:`localmem`.
:param size: Size in bytes of the referenced value. That is,
if the argument is a 32-bit integer, this should be 4. If
the argument is a :class:`cl_mem`, it should be ``sizeof(cl_mem)``.
Unlike most of the wrappers in PyCL, this one doesn't do
much to help you out. Use :meth:`cl_kernel.setarg` if you want
some help setting individual arguments. Calling the kernel
object itself with the desired argument sequence is more preferable
still. Set :attr:`cl_kernel.argtypes` if it can't guess the types properly.
"""
clSetKernelArg.call(kernel, index, size, value)
@_wrapdll(cl_command_queue, cl_kernel, cl_uint,
P(size_t), P(size_t), P(size_t),
cl_uint, P(cl_event), P(cl_event))
def clEnqueueNDRangeKernel(queue, kernel, gsize=(1,), lsize=None,
offset=None, wait_for=None):
"""
Enqueue a kernel for execution. The kernel's arguments should
be set already. For a more idiomatic calling syntax, set the
kernel arguments by calling it and use its :meth:`~cl_kernel.on`
method to queue it.
:param queue: :class:`cl_command_queue` to enqueue it upon.
:param kernel: The :class:`cl_kernel` object you want to run.
:param gsize: Global work size. A 1-, 2-, or 3-tuple of integers
indicating the dimensions of the work to be done.
A scalar is fine too. Default is a single work item.
:param lsize: Local work size. Should have the same dimension as
``gsize``. If None (the default), OpenCL will pick a size for you.
:param offset: Global work item offset. By default, the global id of
work items start at 0 in each dimension. Provide a tuple of the same
dimension as ``gsize`` to offset the ids.
:param wait_for: A :class:`cl_event` or list of them that should complete
prior to this kernel's execution.
:returns: :class:`cl_event` which will identify when the kernel has completed.
Note that the OpenCL :func:`clEnqueueTask` function is equivalent to calling
this function with the default gsize, lsize, and offset values, so we haven't
bothered to wrap it.
"""
if isinstance(gsize, int):
nd = 1
gsize = (gsize,)
else:
nd = len(gsize)
nd = 1
gsize_array = (size_t*nd)()
for i,s in enumerate(gsize):
gsize_array[i] = s
if lsize is None:
lsize_array = None
else:
if isinstance(lsize, int):
lsize = (lsize,)
lsize_array = (size_t*nd)()
for i,s in enumerate(lsize):
lsize_array[i] = s
if offset is None:
offset_array = None
else:
if isinstance(offset, int):
offset = (offset,)
offset_array = (size_t*nd)()
for i,s in enumerate(offset):
offset_array[i] = s
nevents, wait_array = _make_event_array(wait_for)
out_event = cl_event()
clEnqueueNDRangeKernel.call(queue, kernel, nd,
offset_array, gsize_array, lsize_array,
nevents, wait_array, byref(out_event))
return out_event
def buffer_from_ndarray(queue, ary, buf=None, **kw):
"""
Creates (or simply writes to) an OpenCL buffer using the contents
of a Numpy array.
:param queue: :class:`cl_command_queue` to enqueue the write to.
:param ary: :class:`numpy.ndarray` object, or other object implementing
the array interface. We haven't wrapped the rectangular read/write
functions yet, so if the array isn't contiguous, a copy will be made.
Note that the entirety of the provided array will be written, so be sure
to slice it down to just the part you want to write.
:param buf: :class:`cl_mem` buffer object. If not provided, one the size
of the array will be created. In any event, it should hopefully be large
enough to hold the provided array.
:returns: ``(buf, evt)``, where ``evt`` is the :class:`cl_event` returned
by the write operation.
Any additional provided keyword arguments are passed along to
:func:`clEnqueueWriteBuffer`.
"""
ary = np.ascontiguousarray(ary)
if buf is None:
buf = clCreateBuffer(queue.context, ary.nbytes)
if ary.__array_interface__['strides']:
raise ValueError("I don't know how to handle strided arrays yet.")
ptr = void_p(ary.__array_interface__['data'][0])
evt = clEnqueueWriteBuffer(queue, buf, ptr, ary.nbytes, **kw)
return (buf, evt)
def buffer_to_ndarray(queue, buf, out=None, like=None,
dtype='uint8', shape=None, **kw):
"""
Reads from an OpenCL buffer into an ndarray.
:param queue: The queue to put the read operation on.
:param buf: The :class:`cl_mem` buffer to read from
:param out: The :class:`numpy.ndarray` to read into. If not
provided, one will be created based on the following arguments.
Unlike :func:`buffer_from_array`, this must currently be an actual
contiguous :class:`~numpy.ndarray` object.
:param like: Only relevant if no out array is provided. The new array
will have the same shape and dtype as this value.
:param dtype: Only relevant if no out array or ``like`` parameter are provided.
A :class:`numpy.dtype` or anything that can pass for one. Defaults to ``'uint8'``.
:param shape: Only relevant if no out array or ``like`` parameter are provided.
Integer or tuple determining the array's shape. If no shape is given, the
array will be 1d and will have a number of elements based on the buffer's
size and the itemsize of the dtype.
:returns: ``(ary, evt)``, where ``evt`` is the :class:`cl_event` returned by the
read operation.
Any further keyword arguments are passed directly to
:func:`clEnqueueReadBuffer`.
"""
if out is None:
if like is not None:
out = np.empty_like(like)
else:
dtype = np.dtype(dtype)
if shape is None:
shape = buf.size // dtype.itemsize
out = np.empty(shape, dtype)
assert out.flags.contiguous, "Don't know how to write non-contiguous yet."
ptr = void_p(out.__array_interface__['data'][0])
evt = clEnqueueReadBuffer(queue, buf, ptr, out.nbytes, **kw)
return (out, evt)
def buffer_from_pyarray(queue, ary, buf=None, **kw):
"""
Essentially the same as :func:`buffer_from_ndarray`, except that
it accepts arrays from the :mod:`array` module in Python's standard library.
"""
(ptr, length) = ary.buffer_info()
nbytes = length*ary.itemsize
if buf is None:
buf = clCreateBuffer(queue.context, nbytes)
evt = clEnqueueWriteBuffer(queue, buf, ptr, nbytes, **kw)
return (buf, evt)
def buffer_to_pyarray(queue, buf, out=None, like=None,
typecode='B', length=None, **kw):
"""
Essentially the same as :func:`buffer_to_ndarray`, except that it
produces arrays from the :mod:`array` module in Python's standard library.
The ``dtype`` and ``shape`` parameters are replaced:
:param typecode: A character indicating the array typecode. See the
documentation_ for the mappings to C data types. The default is 'B',
for unsigned bytes.
:param length: The number of elements that should be in the array. If
not provided, it will be determined based on the buffer size and the
size of the selected typecode.
.. _documentation: http://docs.python.org/library/array.html
"""
if out is None:
if like is not None:
out = array(like.typecode, like)
else:
if length is None:
length = buf.size // array(typecode).itemsize
out = array(typecode, [0])*length
(ptr, length) = out.buffer_info()
nbytes = length*out.itemsize
evt = clEnqueueReadBuffer(queue, buf, ptr, nbytes)
return (out, evt)
### End OpenCL wrappers. ###
def _make_all():
g = globals()
__all__ = [name for name in g if
not(name.startswith('_')) and
(hasattr(g[name], '__module__') and
g[name].__module__ == __name__)]
g['__all__'] = __all__
_make_all()
if __name__ == '__main__':
import sys
if '--doctest' in sys.argv:
import doctest
doctest.testmod()
else:
print("Using %s" % _dll_filename)
platforms = clGetPlatformIDs()
for p in platforms:
print("Platform: %s" % p.name)
print(" Vendor: %s" % p.vendor)
print(" Version: %s" % p.version)
print(" Profile: %s" % p.profile)
print(" Extensions: %s" % ", ".join(p.extensions))
for d in clGetDeviceIDs(p):
print(" %s: %s" % (str(d.type)[15:], d.name))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment