Skip to content

Instantly share code, notes, and snippets.

@inferrna
Created December 1, 2016 08:04
Show Gist options
  • Save inferrna/ccbcedec405c600970a5a097cf995571 to your computer and use it in GitHub Desktop.
Save inferrna/ccbcedec405c600970a5a097cf995571 to your computer and use it in GitHub Desktop.
$ OFFSET32_BIT=1 CLANG_HOME=/usr/lib/llvm-3.8 py.test -svx test/tf/test_tf.py======================================================================== test session starts =========================================================================
platform linux -- Python 3.5.2, pytest-2.9.1, py-1.4.31, pluggy-0.3.1 -- /usr/bin/python3
cachedir: .cache
rootdir: /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl, inifile: pytest.ini
collected 2 items
test/tf/test_tf.py::test_cwise_sqrt context <pyopencl.Context at 0x222ece0 on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x22b9f30>>
options []
opt_options []
iropencl_options []
running [/usr/lib/llvm-3.8/bin/opt -S test/tf/samples/cwise_op_gpu_sqrt-device-noopt.ll -o /tmp/test-opt.ll]
running [build/ir-to-opencl --inputfile /tmp/test-opt.ll --outputfile /tmp/test-device.cl --kernelname _ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_]
F name _ZN5Eigen8internal15EigenMetaKe
running generation on _ZN5Eigen8internal15EigenMetaKe
running generation on _ZN5Eigen15TensorEvaluatorIK0
running generation on _ZN5Eigen15TensorEvaluatorIK1
running generation on _ZN5Eigen15TensorEvaluatorIK1
running generation on _ZN5Eigen15TensorEvaluatorIN2
running generation on _ZN5Eigen15TensorEvaluatorIN9
running generation on _ZNK5Eigen15TensorEvaluatorI3
running generation on _ZNK5Eigen15TensorEvaluatorI8
running generation on _ZN5Eigen15TensorEvaluatorIK1
running generation on _ZN5Eigen8internal6pstoreIf64_gp
running generation on _ZNK5Eigen15TensorEvaluatorI11
running generation on _ZNK5Eigen15TensorEvaluatorI6
running generation on _ZNK5Eigen8internal14scalar_10
running generation on _ZNK5Eigen8internal14scalar_5
running generation on _ZN5Eigen8internal5psqrtI6fl7
creating program...
building kernel...
/usr/local/lib/python3.5/dist-packages/pyopencl/__init__.py:206: CompilerWarning: Non-empty compiler output encountered. Set the environment variable PYOPENCL_COMPILER_OUTPUT=1 to see more.
"to see more.", CompilerWarning)
running kernel...
FAILED
====================================================================== short test summary info =======================================================================
FAIL test/tf/test_tf.py::test_cwise_sqrt
============================================================================== FAILURES ==============================================================================
__________________________________________________________________________ test_cwise_sqrt ___________________________________________________________________________
context = <pyopencl.Context at 0x222ece0 on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x22b9f30>>
q = <pyopencl.cffi_cl.CommandQueue object at 0x7f22d07aa668>
float_data = array([ 0.28847906, -0.46295407, -1.33800447, ..., 0.85320115,
-0.70543814, 0.55171764], dtype=float32)
float_data_gpu = <pyopencl.cffi_cl.Buffer object at 0x7f22d07aa630>
@pytest.mark.skipif(os.environ.get('TRAVIS', None) == 'true', reason='fails on travis mac cpu, not looked into why yet')
def test_cwise_sqrt(context, q, float_data, float_data_gpu):
options = test_common.cocl_options()
i = 0
opt_options = []
iropencl_options = []
while i < len(options):
if options[i] == '--devicell-opt':
opt_options.append('-' + options[i + 1])
i += 2
continue
if options[i] in ['--run_branching_transforms', '--branches_as_switch']:
iropencl_options.append(options[i])
i += 1
continue
raise Exception('unknown option ', options[i])
i += 1
print('opt_options', opt_options)
print('iropencl_options', iropencl_options)
myrun([
join(CLANG_HOME, 'bin/opt')
] + opt_options + [
'-S',
'test/tf/samples/cwise_op_gpu_sqrt-device-noopt.ll',
'-o', '/tmp/test-opt.ll'
])
myrun([
'build/ir-to-opencl'
] + iropencl_options + [
'--inputfile', '/tmp/test-opt.ll',
'--outputfile', '/tmp/test-device.cl',
'--kernelname', '_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_'
])
with open('/tmp/test-device.cl', 'r') as f:
cl_sourcecode = f.read()
print('creating program...')
prog_unbuilt = cl.Program(context, cl_sourcecode)
print('building kernel...')
prog = prog_unbuilt.build()
N = 10
# global struct Eigen__TensorEvaluator_nopointers* eval_nopointers, global float* eval_ptr0, long eval_ptr0_offset, global float* eval_ptr1, long eval_ptr1_offset, int size, local int *scratch
# what we need:
# struct Eigen__TensorEvaluator_nopointers Note that none of the values we copy across are actually use, so we can just create a sufficiently large buffer...
# global float *eval_ptr0 => this will receive the result. just create a sufficiently large buffer
# ptr0_offset => 0
# eval_ptr1 => will contian the data we want to reduce
# eval_ptr1_offset=> 0
# size => eg 10, to reduce 10 values
# scratch => set to workgroupsize * sizeof(float)
eval_nopointers_gpu = cl.Buffer(context, cl.mem_flags.READ_WRITE, size=4096)
eval_ptr0 = np.zeros(1024, dtype=np.float32)
eval_ptr0_gpu = cl.Buffer(context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=eval_ptr0)
eval_ptr0_offset = 0
eval_ptr1 = np.random.uniform(0, 1, size=(1024,)).astype(np.float32) + 1.0
eval_ptr1_gpu = cl.Buffer(context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=eval_ptr1)
eval_ptr1_offset = 0
size = N
global_size = 256
workgroup_size = 256
scratch = workgroup_size * 4
print('running kernel...')
prog.__getattr__('_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_'[:31])(
q, (global_size,), (workgroup_size,),
eval_nopointers_gpu,
eval_ptr0_gpu, offset_type(eval_ptr0_offset),
eval_ptr1_gpu, offset_type(eval_ptr1_offset),
np.int32(size),
> cl.LocalMemory(scratch)
)
test/tf/test_tf.py:108:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/usr/local/lib/python3.5/dist-packages/pyopencl/__init__.py:995: in kernel_call
return self._enqueue(self, queue, global_size, local_size, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyopencl.cffi_cl.Kernel object at 0x7f22d079aef0>, queue = <pyopencl.cffi_cl.CommandQueue object at 0x7f22d07aa668>, global_size = (256,), local_size = (256,)
arg0 = <pyopencl.cffi_cl.Buffer object at 0x7f22de3f1470>, arg1 = <pyopencl.cffi_cl.Buffer object at 0x7f22de3f1198>, arg2 = 0
arg3 = <pyopencl.cffi_cl.Buffer object at 0x7f22d079ae80>, arg4 = 0, arg5 = 10, arg6 = <pyopencl.cffi_cl.LocalMemory object at 0x7f22d07d2f88>, global_offset = None
g_times_l = None, wait_for = None
> ???
E pyopencl.cffi_cl.LogicError: when processing argument #3 (1-based): clsetkernelarg failed: INVALID_ARG_SIZE
<generated function enqueue_knl__ZN5Eigen8internal15EigenMetaKe>:138: LogicError
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Interrupted: stopping after 1 failures !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
====================================================================== 1 failed in 0.81 seconds ======================================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment