Skip to content

Instantly share code, notes, and snippets.

@arghdos
Last active Oct 17, 2017
Embed
What would you like to do?
Preamble w/ codegen
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__constant int const lookup[55] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 0, 1, 2, 3, 4, 0, 1, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 7 };
int indirect(int start, int end, int match)
{
int result = start;
for (int i = start + 1; i < end; ++i)
{
if (lookup[i] == match)
result = i;
}
return result;
}
__constant int const offsets[11] = { 0, 9, 12, 17, 19, 26, 29, 33, 40, 47, 55 };
__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global double *__restrict__ out, __global double const *__restrict__ data)
{
int ind;
for (int i = 0; i <= 9; ++i)
{
ind = indirect(offsets[i], offsets[1 + i], 1);
out[i] = data[ind];
}
}
import loopy as lp
import numpy as np
import pyopencl as cl
from loopy.kernel.data import temp_var_scope as scopes
lp.set_caching_enabled(False)
class indirectmangler(object):
def __init__(self, name, arg_dtypes, result_dtypes):
self.name = name
self.arg_dtypes = arg_dtypes
self.result_dtypes = result_dtypes
def __call__(self, kernel, name, arg_dtypes):
"""
A function that will return a :class:`loopy.kernel.data.CallMangleInfo`
to interface with the calling :class:`loopy.LoopKernel`
"""
if name != self.name:
return None
from loopy.types import to_loopy_type
from loopy.kernel.data import CallMangleInfo
def __compare(d1, d2):
# compare dtypes ignoring atomic
return to_loopy_type(d1, for_atomic=True) == \
to_loopy_type(d2, for_atomic=True)
# check types
if len(arg_dtypes) != len(self.arg_dtypes):
raise Exception('Unexpected number of arguements provided to mangler {},'
' expected {}, got {}'.format(self.name,
len(self.arg_dtypes),
len(arg_dtypes)))
for i, (d1, d2) in enumerate(zip(self.arg_dtypes, arg_dtypes)):
if not __compare(d1, d2):
raise Exception('Argument at index {} for mangler {} does not match'
'expected dtype. Expected {}, got {}'.format(
i, self.name, str(d1), str(d2)))
# get target for creation
target = arg_dtypes[0].target
return CallMangleInfo(
target_name=self.name,
result_dtypes=tuple(to_loopy_type(x, target=target) for x in
self.result_dtypes),
arg_dtypes=arg_dtypes)
class indirect(object):
def __init__(self, arr):
self.name = 'indirect'
self.arr = lp.TemporaryVariable('lookup', initializer=arr, dtype=arr.dtype, shape=arr.shape, scope=scopes.GLOBAL, read_only=True)
self.code = """
int indirect(int start, int end, int match)
{
int result = start;
for (int i = start + 1; i < end; ++i)
{
if (lookup[i] == match)
result = i;
}
return result;
}
"""
self.arg_dtypes = (np.int32, np.int32, np.int32)
self.result_dtypes = (np.int32,)
self.func_mangler = indirectmangler(self.name, self.arg_dtypes, self.result_dtypes)
def generate_code(self, preamble_info):
from cgen import Initializer
from loopy.target.c import generate_array_literal
codegen_state = preamble_info.codegen_state.copy(is_generating_device_code=True)
kernel = preamble_info.kernel
ast_builder = codegen_state.ast_builder
target = kernel.target
decl_info, = self.arr.decl_info(target, index_dtype=kernel.index_dtype)
decl = ast_builder.wrap_global_constant(
ast_builder.get_temporary_decl(
codegen_state, 1, self.arr,
decl_info))
if self.arr.initializer is not None:
decl = Initializer(decl, generate_array_literal(
codegen_state, self.arr, self.arr.initializer))
return '\n'.join([str(decl), self.code])
def get_descriptor(self, func_match):
return 'custom_funcs_indirect'
def get_func_mangler(self):
return self.func_mangler
def match(self, func_sig):
return func_sig.name == self.name
def __call__(self, preamble_info):
# find a function matching this name
func_match = next(
(x for x in preamble_info.seen_functions
if self.match(x)), None)
desc = self.get_descriptor(func_match)
code = ''
if func_match is not None:
from loopy.types import to_loopy_type
# check types
if tuple(to_loopy_type(x) for x in self.arg_dtypes) == \
func_match.arg_dtypes:
code = self.generate_code(preamble_info)
# return code generator
yield (desc, code)
n = 10
# for each entry come up with a random number of data points
num_data = np.random.randint(2, 10, size=n, dtype=np.int32)
# turn into offsets
offsets = np.asarray(np.hstack(([0], np.cumsum(num_data))), dtype=np.int32)
# create lookup data
lookup = np.empty(0)
for i in num_data:
lookup = np.hstack((lookup, np.arange(i)))
lookup = np.asarray(lookup, dtype=np.int32)
# and create data array
data = np.random.rand(np.product(num_data))
# make kernel
kernel = lp.make_kernel('{[i]: 0 <= i < n}',
"""
for i
<>ind = indirect(offsets[i], offsets[i + 1], 1)
out[i] = data[ind]
end
""",
[lp.GlobalArg('out', shape=('n',)),
lp.TemporaryVariable('offsets', shape=(offsets.size,), initializer=offsets, scope=scopes.GLOBAL, read_only=True),
lp.GlobalArg('data', shape=(data.size,), dtype=np.float64)],
)
kernel = lp.fix_parameters(kernel, **{'n' : n})
pre = indirect(lookup)
kernel = lp.register_preamble_generators(kernel, [pre])
kernel = lp.register_function_manglers(kernel, [pre.get_func_mangler()])
print(lp.generate_code(kernel)[0])
# and call (functionality unimportant, more that it compiles)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
assert np.allclose(kernel(queue, data=data.flatten('C'))[1][0], data[offsets[:-1] + 1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment