Skip to content

Instantly share code, notes, and snippets.

@arghdos
Last active Oct 17, 2017
Embed
What would you like to do?
Demonstrating a preamble function operating on temporary variable array
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
int indirect(int start, int end, int match)
{
for (int i = start; i < end; ++i)
{
if (lookup[i] == match)
return i - start;
}
}
__constant int const lookup[54] = { 0, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 6, 7, 8 };
__constant int const offsets[11] = { 0, 1, 6, 12, 15, 23, 27, 32, 40, 45, 54 };
__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global double *__restrict__ out, __global double const *__restrict__ data)
{
for (int i = 0; i <= 9; ++i)
out[i] = data[indirect(offsets[i], offsets[1 + i], 1)];
}
import loopy as lp
import numpy as np
import pyopencl as cl
from loopy.kernel.data import temp_var_scope as scopes
class indirectmangler(object):
def __init__(self, name, arg_dtypes, result_dtypes):
self.name = name
self.arg_dtypes = arg_dtypes
self.result_dtypes = result_dtypes
def __call__(self, kernel, name, arg_dtypes):
"""
A function that will return a :class:`loopy.kernel.data.CallMangleInfo`
to interface with the calling :class:`loopy.LoopKernel`
"""
if name != self.name:
return None
from loopy.types import to_loopy_type
from loopy.kernel.data import CallMangleInfo
def __compare(d1, d2):
# compare dtypes ignoring atomic
return to_loopy_type(d1, for_atomic=True) == \
to_loopy_type(d2, for_atomic=True)
# check types
if len(arg_dtypes) != len(self.arg_dtypes):
raise Exception('Unexpected number of arguements provided to mangler {},'
' expected {}, got {}'.format(self.name,
len(self.arg_dtypes),
len(arg_dtypes)))
for i, (d1, d2) in enumerate(zip(self.arg_dtypes, arg_dtypes)):
if not __compare(d1, d2):
raise Exception('Argument at index {} for mangler {} does not match'
'expected dtype. Expected {}, got {}'.format(
i, self.name, str(d1), str(d2)))
# get target for creation
target = arg_dtypes[0].target
return CallMangleInfo(
target_name=self.name,
result_dtypes=tuple(to_loopy_type(x, target=target) for x in
self.result_dtypes),
arg_dtypes=arg_dtypes)
class indirect(object):
def __init__(self):
self.name = 'indirect'
self.code = ("""
int indirect(int start, int end, int match)
{
for (int i = start; i < end; ++i)
{
if (lookup[i] == match)
return i - start;
}
}
""")
self.arg_dtypes = (np.int32, np.int32, np.int32)
self.result_dtypes = (np.int32,)
self.func_mangler = indirectmangler(self.name, self.arg_dtypes, self.result_dtypes)
def generate_code(self, preamble_info):
return self.code
def get_descriptor(self, func_match):
return 'custom_funcs_indirect'
def get_func_mangler(self):
return self.func_mangler
def match(self, func_sig):
return func_sig.name == self.name
def __call__(self, preamble_info):
# find a function matching this name
func_match = next(
(x for x in preamble_info.seen_functions
if self.match(x)), None)
desc = self.get_descriptor(func_match)
code = ''
if func_match is not None:
from loopy.types import to_loopy_type
# check types
if tuple(to_loopy_type(x) for x in self.arg_dtypes) == \
func_match.arg_dtypes:
code = self.generate_code(func_match)
# return code generator
yield (desc, code)
n = 10
# for each entry come up with a random number of data points
num_data = np.random.randint(1, 10, size=n, dtype=np.int32)
# turn into offsets
offsets = np.asarray(np.hstack(([0], np.cumsum(num_data))), dtype=np.int32)
# create lookup data
lookup = np.empty(0)
for i in num_data:
lookup = np.hstack((lookup, np.arange(i)))
lookup = np.asarray(lookup, dtype=np.int32)
# and create data array
data = np.random.rand(num_data.size)
# make kernel
kernel = lp.make_kernel('{[i]: 0 <= i < n}',
"""
out[i] = data[indirect(offsets[i], offsets[i + 1], 1)]
""",
[lp.GlobalArg('out', shape=('n',)),
lp.TemporaryVariable('lookup', shape=(lookup.size,), initializer=lookup, scope=scopes.GLOBAL, read_only=True),
lp.TemporaryVariable('offsets', shape=(offsets.size,), initializer=offsets, scope=scopes.GLOBAL, read_only=True),
lp.GlobalArg('data', shape=(data.size,), dtype=np.float64)],
)
kernel = lp.fix_parameters(kernel, **{'n' : n})
pre = indirect()
kernel = lp.register_preamble_generators(kernel, [pre])
kernel = lp.register_function_manglers(kernel, [pre.get_func_mangler()])
print(lp.generate_code(kernel)[0])
# and call (functionality unimportant, more that it compiles)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
assert np.allclose(kernel(queue, data=data.flatten('C'))[1][0], data[:, offsets[:] + 1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment