Skip to content

Instantly share code, notes, and snippets.

@inferrna
Last active November 28, 2016 16:35
Show Gist options
  • Save inferrna/d1e608b4bdec84f7ace00993be608252 to your computer and use it in GitHub Desktop.
Save inferrna/d1e608b4bdec84f7ace00993be608252 to your computer and use it in GitHub Desktop.
$ OFFSET32_BIT=1 CLANG_HOME=/usr/lib/llvm-3.8 py.test -svx test/test_compile.py -k pointerpointer.cu-myte6kernel
======================================================================== test session starts =========================================================================
platform linux -- Python 3.5.2, pytest-2.9.1, py-1.4.31, pluggy-0.3.1 -- /usr/bin/python3
cachedir: .cache
rootdir: /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl, inifile: pytest.ini
collecting 0 itemsmarking xfail
marking xfail
marking xfail
marking xfail
collected 9 items
test/test_compile.py::test_compile[test/pointerpointer.cu-myte6kernel] context <pyopencl.Context at 0x1c2fc60 on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x1cbcbd0>>
options []
bin/cocl -c /tmp/testprog.cu
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
2 warnings generated.
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
build/ir-to-opencl --inputfile /tmp/testprog-device.ll --outputfile /tmp/testprog-device.cl --kernelname --add_ir_to_cl
terminate called after throwing an instance of 'std::runtime_error'
what(): Couldnt find kernel
mangledname _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii
options []
bin/cocl -c /tmp/testprog.cu
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
2 warnings generated.
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
build/ir-to-opencl --inputfile /tmp/testprog-device.ll --outputfile /tmp/testprog-device.cl --kernelname _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii --add_ir_to_cl
FAILED
====================================================================== short test summary info =======================================================================
FAIL test/test_compile.py::test_compile[test/pointerpointer.cu-myte6kernel]
============================================================================== FAILURES ==============================================================================
__________________________________________________________ test_compile[test/pointerpointer.cu-myte6kernel] __________________________________________________________
context = <pyopencl.Context at 0x1c2fc60 on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x1cbcbd0>>
cu_filepath = 'test/pointerpointer.cu', kernelname = 'myte6kernel'
@pytest.mark.parametrize("cu_filepath,kernelname", get_test_definitions())
def test_compile(context, cu_filepath, kernelname):
with open(cu_filepath, 'r') as f:
cu_code = f.read()
try:
cl_code = test_common.cu_to_cl(cu_code, '')
except:
pass
with open('/tmp/testprog-device.ll') as f:
ll_code = f.read()
for line in ll_code.split('\n'):
if line.startswith('define') and kernelname in line:
mangledname = line.split('@')[1].split('(')[0]
break
print('mangledname', mangledname)
cl_code = test_common.cu_to_cl(cu_code, mangledname)
> test_common.build_kernel(context, cl_code, mangledname)
test/test_compile.py:47:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test/test_common.py:232: in build_kernel
prog = cl.Program(context, cl_sourcecode).build()
/usr/local/lib/python3.5/dist-packages/pyopencl/__init__.py:382: in build
options=options, source=self._source)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyopencl.Program object at 0x7f30a1f62c88>, build_func = <function Program.build.<locals>.<lambda> at 0x7f30b019fe18>
options = ['-I', '/usr/local/lib/python3.5/dist-packages/pyopencl/cl']
source = 'struct class_HalfImpl {\n short f0;\n};\nstruct class_GpuDevice {\n int f0;\n global struct class_StreamInte...itofp v8 */;\n v10 = (float)(v6 + 123);\n /* void v11 = store v10 data */;\n data[0] = v10;\n return;\n}\n'
def _build_and_catch_errors(self, build_func, options, source=None):
try:
return build_func()
except _cl.RuntimeError as e:
what = e.what
if options:
what = what + "\n(options: %s)" % " ".join(options)
if source is not None:
from tempfile import NamedTemporaryFile
srcfile = NamedTemporaryFile(mode="wt", delete=False, suffix=".cl")
try:
srcfile.write(source)
finally:
srcfile.close()
what = what + "\n(source saved as %s)" % srcfile.name
code = e.code
routine = e.routine
err = _cl.RuntimeError(
_ErrorRecord(
what=lambda: what,
code=lambda: code,
routine=lambda: routine))
# Python 3.2 outputs the whole list of currently active exceptions
# This serves to remove one (redundant) level from that nesting.
> raise err
E pyopencl.cffi_cl.RuntimeError: clbuildprogram failed: BUILD_PROGRAM_FAILURE -
E
E Build on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x1cbcbd0>:
E
E "/tmp/OCL32651T1.cl", line 34: error: kernel arguments can't be declared with
E types
E bool/half/size_t/ptrdiff_t/intptr_t/uintptr_t/pointer-to-pointer
E kernel void _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii(global struct class_TensorEvaluator6* structs, uint structs_offset, global float* data, uint data_offset, global struct class_GpuDevice* gpudevices, uint gpudevices_offset, int a, int b, int c, local int *scratch);
E ^
E
E "/tmp/OCL32651T1.cl", line 36: error: kernel arguments can't be declared with
E types
E bool/half/size_t/ptrdiff_t/intptr_t/uintptr_t/pointer-to-pointer
E kernel void _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii(global struct class_TensorEvaluator6* structs, uint structs_offset, global float* data, uint data_offset, global struct class_GpuDevice* gpudevices, uint gpudevices_offset, int a, int b, int c, local int *scratch) {
E ^
E
E "/tmp/OCL32651T1.cl", line 46: warning: label "v1" was declared but never
E referenced
E v1:;
E ^
E
E 2 errors detected in the compilation of "/tmp/OCL32651T1.cl".
E Frontend phase failed compilation.
E
E (options: -I /usr/local/lib/python3.5/dist-packages/pyopencl/cl)
E (source saved as /tmp/tmpf8iufbw0.cl)
/usr/local/lib/python3.5/dist-packages/pyopencl/__init__.py:417: RuntimeError
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Interrupted: stopping after 1 failures !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
====================================================== 8 tests deselected by '-kpointerpointer.cu-myte6kernel' =======================================================
=============================================================== 1 failed, 8 deselected in 6.30 seconds ===============================================================
struct class_HalfImpl {
short f0;
};
struct class_GpuDevice {
int f0;
global struct class_StreamInterface* f1;
};
struct class_StreamInterface {
char f0;
};
struct class_HalfBase {
struct class_HalfImpl f0;
};
struct class_TensorEvaluator0 {
global struct class_Half* f0;
struct class_GpuDevice f1;
};
struct class_Half {
struct class_HalfBase f0;
};
struct class_TensorEvaluator2 {
global struct class_Half* f0;
struct class_GpuDevice f1;
};
struct class_TensorEvaluator7 {
global struct class_Half* f0;
struct class_TensorEvaluator2 f1;
};
struct class_TensorEvaluator6 {
struct class_TensorEvaluator0 f0;
struct class_TensorEvaluator7 f1;
};
kernel void _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii(global struct class_TensorEvaluator6* structs, uint structs_offset, global float* data, uint data_offset, global struct class_GpuDevice* gpudevices, uint gpudevices_offset, int a, int b, int c, local int *scratch);
kernel void _Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii(global struct class_TensorEvaluator6* structs, uint structs_offset, global float* data, uint data_offset, global struct class_GpuDevice* gpudevices, uint gpudevices_offset, int a, int b, int c, local int *scratch) {
gpudevices += gpudevices_offset;
data += data_offset;
structs += structs_offset;
float v10;
global struct class_Half* v4;
long v2;
short v6;
v1:;
/* long v2 = sext a */;
v2 = a;
/* struct class_Half** v3 = getelementptr structs v2 <unk> <unk> */;
/* struct class_Half* v4 = load v3 */;
v4 = (&(structs[v2].f0.f0))[0];
/* short* v5 = getelementptr v4 v2 <unk> <unk> <unk> */;
/* short v6 = load v5 */;
v6 = (&(v4[v2].f0.f0.f0))[0];
/* int v7 = sext v6 */;
/* int v8 = add v7 <unk> */;
/* float v10 = sitofp v8 */;
v10 = (float)(v6 + 123);
/* void v11 = store v10 data */;
data[0] = v10;
return;
}
; ModuleID = '/tmp/testprog-device-noopt.ll'
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
%struct.MyStruct = type { float, i32 }
%class.Half = type { %class.HalfBase }
%class.HalfBase = type { %class.HalfImpl }
%class.HalfImpl = type { i16 }
%class.TensorEvaluator6 = type { %class.TensorEvaluator0, %class.TensorEvaluator7 }
%class.TensorEvaluator0 = type { %class.Half*, %class.GpuDevice }
%class.GpuDevice = type { i32, %class.StreamInterface* }
%class.StreamInterface = type { i8 }
%class.TensorEvaluator7 = type { %class.Half*, %class.TensorEvaluator2 }
%class.TensorEvaluator2 = type { %class.Half*, %class.GpuDevice }
@.str = private unnamed_addr constant [5 x i8] c"NONE\00", align 1
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @_ZL21__nvvm_reflect_anchorv to i8*)], section "llvm.metadata"
; Function Attrs: nounwind readnone
define internal i32 @_ZL21__nvvm_reflect_anchorv() #0 {
%1 = tail call i32 @__nvvm_reflect(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0)) #3
ret i32 %1
}
; Function Attrs: nounwind readnone
declare i32 @__nvvm_reflect(i8*) #0
; Function Attrs: norecurse nounwind readonly
define float @_Z9sumStructPP8MyStructi(%struct.MyStruct** nocapture readonly %p_structs, i32 %N) #1 {
%1 = icmp sgt i32 %N, 0
br i1 %1, label %.lr.ph.preheader, label %._crit_edge
.lr.ph.preheader: ; preds = %0
%xtraiter = and i32 %N, 1
%lcmp.mod = icmp eq i32 %xtraiter, 0
br i1 %lcmp.mod, label %.lr.ph.preheader.split, label %.lr.ph.prol
.lr.ph.prol: ; preds = %.lr.ph.preheader
%2 = load %struct.MyStruct*, %struct.MyStruct** %p_structs, align 8, !tbaa !9
%3 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %2, i64 0, i32 0
%4 = load float, float* %3, align 4, !tbaa !13
%5 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %2, i64 0, i32 1
%6 = load i32, i32* %5, align 4, !tbaa !17
%7 = sitofp i32 %6 to float
%8 = fmul float %7, 3.500000e+00
%9 = fadd float %4, %8
%10 = fadd float %9, 0.000000e+00
br label %.lr.ph.preheader.split
.lr.ph.preheader.split: ; preds = %.lr.ph.prol, %.lr.ph.preheader
%.lcssa.unr = phi float [ undef, %.lr.ph.preheader ], [ %10, %.lr.ph.prol ]
%sum.02.unr = phi float [ 0.000000e+00, %.lr.ph.preheader ], [ %10, %.lr.ph.prol ]
%i.01.unr = phi i32 [ 0, %.lr.ph.preheader ], [ 1, %.lr.ph.prol ]
%11 = icmp eq i32 %N, 1
br i1 %11, label %._crit_edge.loopexit, label %.lr.ph.preheader.split.split
.lr.ph.preheader.split.split: ; preds = %.lr.ph.preheader.split
br label %.lr.ph
._crit_edge.loopexit.unr-lcssa: ; preds = %.lr.ph
%.lcssa3 = phi float [ %34, %.lr.ph ]
br label %._crit_edge.loopexit
._crit_edge.loopexit: ; preds = %._crit_edge.loopexit.unr-lcssa, %.lr.ph.preheader.split
%.lcssa = phi float [ %.lcssa.unr, %.lr.ph.preheader.split ], [ %.lcssa3, %._crit_edge.loopexit.unr-lcssa ]
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.loopexit, %0
%sum.0.lcssa = phi float [ 0.000000e+00, %0 ], [ %.lcssa, %._crit_edge.loopexit ]
ret float %sum.0.lcssa
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader.split.split
%sum.02 = phi float [ %sum.02.unr, %.lr.ph.preheader.split.split ], [ %34, %.lr.ph ]
%i.01 = phi i32 [ %i.01.unr, %.lr.ph.preheader.split.split ], [ %35, %.lr.ph ]
%12 = sext i32 %i.01 to i64
%13 = getelementptr inbounds %struct.MyStruct*, %struct.MyStruct** %p_structs, i64 %12
%14 = load %struct.MyStruct*, %struct.MyStruct** %13, align 8, !tbaa !9
%15 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %14, i64 0, i32 0
%16 = load float, float* %15, align 4, !tbaa !13
%17 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %14, i64 0, i32 1
%18 = load i32, i32* %17, align 4, !tbaa !17
%19 = sitofp i32 %18 to float
%20 = fmul float %19, 3.500000e+00
%21 = fadd float %16, %20
%22 = fadd float %sum.02, %21
%23 = add nuw nsw i32 %i.01, 1
%24 = sext i32 %23 to i64
%25 = getelementptr inbounds %struct.MyStruct*, %struct.MyStruct** %p_structs, i64 %24
%26 = load %struct.MyStruct*, %struct.MyStruct** %25, align 8, !tbaa !9
%27 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %26, i64 0, i32 0
%28 = load float, float* %27, align 4, !tbaa !13
%29 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %26, i64 0, i32 1
%30 = load i32, i32* %29, align 4, !tbaa !17
%31 = sitofp i32 %30 to float
%32 = fmul float %31, 3.500000e+00
%33 = fadd float %28, %32
%34 = fadd float %22, %33
%35 = add nsw i32 %i.01, 2
%exitcond.1 = icmp eq i32 %35, %N
br i1 %exitcond.1, label %._crit_edge.loopexit.unr-lcssa, label %.lr.ph
}
; Function Attrs: norecurse nounwind
define void @_Z8mykernelPfP8MyStructi(float* nocapture %data, %struct.MyStruct* %structs, i32 %N) #2 {
%1 = icmp sgt i32 %N, 0
%2 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %structs, i64 0, i32 0
br i1 %1, label %.lr.ph.i.preheader, label %._Z9sumStructPP8MyStructi.exit_crit_edge
._Z9sumStructPP8MyStructi.exit_crit_edge: ; preds = %0
%.pre17 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %structs, i64 0, i32 1
br label %_Z9sumStructPP8MyStructi.exit
.lr.ph.i.preheader: ; preds = %0
%3 = load float, float* %2, align 4
%4 = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %structs, i64 0, i32 1
%5 = load i32, i32* %4, align 4
%6 = sitofp i32 %5 to float
%7 = fmul float %6, 3.500000e+00
%8 = fadd float %3, %7
%9 = add i32 %N, -1
%xtraiter = and i32 %N, 7
%lcmp.mod = icmp eq i32 %xtraiter, 0
br i1 %lcmp.mod, label %.lr.ph.i.preheader.split, label %.lr.ph.i.prol.preheader
.lr.ph.i.prol.preheader: ; preds = %.lr.ph.i.preheader
br label %.lr.ph.i.prol
.lr.ph.i.prol: ; preds = %.lr.ph.i.prol, %.lr.ph.i.prol.preheader
%sum.02.i.prol = phi float [ %10, %.lr.ph.i.prol ], [ 0.000000e+00, %.lr.ph.i.prol.preheader ]
%i.01.i.prol = phi i32 [ %11, %.lr.ph.i.prol ], [ 0, %.lr.ph.i.prol.preheader ]
%prol.iter = phi i32 [ %prol.iter.sub, %.lr.ph.i.prol ], [ %xtraiter, %.lr.ph.i.prol.preheader ]
%10 = fadd float %sum.02.i.prol, %8
%11 = add nuw nsw i32 %i.01.i.prol, 1
%prol.iter.sub = add i32 %prol.iter, -1
%prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0
br i1 %prol.iter.cmp, label %.lr.ph.i.preheader.split.loopexit, label %.lr.ph.i.prol, !llvm.loop !18
.lr.ph.i.preheader.split.loopexit: ; preds = %.lr.ph.i.prol
%.lcssa28 = phi i32 [ %11, %.lr.ph.i.prol ]
%.lcssa27 = phi float [ %10, %.lr.ph.i.prol ]
br label %.lr.ph.i.preheader.split
.lr.ph.i.preheader.split: ; preds = %.lr.ph.i.preheader.split.loopexit, %.lr.ph.i.preheader
%.lcssa24.unr = phi float [ undef, %.lr.ph.i.preheader ], [ %.lcssa27, %.lr.ph.i.preheader.split.loopexit ]
%sum.02.i.unr = phi float [ 0.000000e+00, %.lr.ph.i.preheader ], [ %.lcssa27, %.lr.ph.i.preheader.split.loopexit ]
%i.01.i.unr = phi i32 [ 0, %.lr.ph.i.preheader ], [ %.lcssa28, %.lr.ph.i.preheader.split.loopexit ]
%12 = icmp ult i32 %9, 7
br i1 %12, label %_Z9sumStructPP8MyStructi.exit.loopexit, label %.lr.ph.i.preheader.split.split
.lr.ph.i.preheader.split.split: ; preds = %.lr.ph.i.preheader.split
br label %.lr.ph.i
.lr.ph.i: ; preds = %.lr.ph.i, %.lr.ph.i.preheader.split.split
%sum.02.i = phi float [ %sum.02.i.unr, %.lr.ph.i.preheader.split.split ], [ %20, %.lr.ph.i ]
%i.01.i = phi i32 [ %i.01.i.unr, %.lr.ph.i.preheader.split.split ], [ %21, %.lr.ph.i ]
%13 = fadd float %sum.02.i, %8
%14 = fadd float %13, %8
%15 = fadd float %14, %8
%16 = fadd float %15, %8
%17 = fadd float %16, %8
%18 = fadd float %17, %8
%19 = fadd float %18, %8
%20 = fadd float %19, %8
%21 = add nsw i32 %i.01.i, 8
%exitcond.i.7 = icmp eq i32 %21, %N
br i1 %exitcond.i.7, label %_Z9sumStructPP8MyStructi.exit.loopexit.unr-lcssa, label %.lr.ph.i
_Z9sumStructPP8MyStructi.exit.loopexit.unr-lcssa: ; preds = %.lr.ph.i
%.lcssa26 = phi float [ %20, %.lr.ph.i ]
br label %_Z9sumStructPP8MyStructi.exit.loopexit
_Z9sumStructPP8MyStructi.exit.loopexit: ; preds = %_Z9sumStructPP8MyStructi.exit.loopexit.unr-lcssa, %.lr.ph.i.preheader.split
%.lcssa24 = phi float [ %.lcssa24.unr, %.lr.ph.i.preheader.split ], [ %.lcssa26, %_Z9sumStructPP8MyStructi.exit.loopexit.unr-lcssa ]
br label %_Z9sumStructPP8MyStructi.exit
_Z9sumStructPP8MyStructi.exit: ; preds = %_Z9sumStructPP8MyStructi.exit.loopexit, %._Z9sumStructPP8MyStructi.exit_crit_edge
%.pre-phi18 = phi i32* [ %.pre17, %._Z9sumStructPP8MyStructi.exit_crit_edge ], [ %4, %_Z9sumStructPP8MyStructi.exit.loopexit ]
%sum.0.lcssa.i = phi float [ 0.000000e+00, %._Z9sumStructPP8MyStructi.exit_crit_edge ], [ %.lcssa24, %_Z9sumStructPP8MyStructi.exit.loopexit ]
store float %sum.0.lcssa.i, float* %data, align 4, !tbaa !20
%22 = load float, float* %2, align 4
%23 = load i32, i32* %.pre-phi18, align 4
%24 = sitofp i32 %23 to float
%25 = fmul float %24, 3.500000e+00
%26 = fadd float %22, %25
br label %.lr.ph.i11
.lr.ph.i11: ; preds = %.lr.ph.i11, %_Z9sumStructPP8MyStructi.exit
%sum.02.i8 = phi float [ 0.000000e+00, %_Z9sumStructPP8MyStructi.exit ], [ %29, %.lr.ph.i11 ]
%i.01.i9 = phi i32 [ 0, %_Z9sumStructPP8MyStructi.exit ], [ %30, %.lr.ph.i11 ]
%27 = fadd float %sum.02.i8, %26
%28 = fadd float %27, %26
%29 = fadd float %28, %26
%30 = add nsw i32 %i.01.i9, 3
%exitcond.i10.2 = icmp eq i32 %30, 123
br i1 %exitcond.i10.2, label %_Z9sumStructPP8MyStructi.exit12, label %.lr.ph.i11
_Z9sumStructPP8MyStructi.exit12: ; preds = %.lr.ph.i11
%.lcssa25 = phi float [ %29, %.lr.ph.i11 ]
%31 = getelementptr inbounds float, float* %data, i64 3
store float %.lcssa25, float* %31, align 4, !tbaa !20
%32 = load float, float* %2, align 4
%33 = load i32, i32* %.pre-phi18, align 4
%34 = sitofp i32 %33 to float
%35 = fmul float %34, 3.500000e+00
%36 = fadd float %32, %35
br label %.lr.ph.i5
.lr.ph.i5: ; preds = %.lr.ph.i5, %_Z9sumStructPP8MyStructi.exit12
%sum.02.i2 = phi float [ 0.000000e+00, %_Z9sumStructPP8MyStructi.exit12 ], [ %51, %.lr.ph.i5 ]
%i.01.i3 = phi i32 [ 0, %_Z9sumStructPP8MyStructi.exit12 ], [ %52, %.lr.ph.i5 ]
%37 = fadd float %sum.02.i2, %36
%38 = fadd float %37, %36
%39 = fadd float %38, %36
%40 = fadd float %39, %36
%41 = fadd float %40, %36
%42 = fadd float %41, %36
%43 = fadd float %42, %36
%44 = fadd float %43, %36
%45 = fadd float %44, %36
%46 = fadd float %45, %36
%47 = fadd float %46, %36
%48 = fadd float %47, %36
%49 = fadd float %48, %36
%50 = fadd float %49, %36
%51 = fadd float %50, %36
%52 = add nsw i32 %i.01.i3, 15
%exitcond.i4.14 = icmp eq i32 %52, 12300
br i1 %exitcond.i4.14, label %_Z9sumStructPP8MyStructi.exit6, label %.lr.ph.i5
_Z9sumStructPP8MyStructi.exit6: ; preds = %.lr.ph.i5
%.lcssa = phi float [ %51, %.lr.ph.i5 ]
%53 = getelementptr inbounds float, float* %data, i64 4
store float %.lcssa, float* %53, align 4, !tbaa !20
ret void
}
; Function Attrs: norecurse nounwind readonly
define float @_Z12getHalfValueP4Halfi(%class.Half* nocapture readonly %half_, i32 %a) #1 {
%1 = sext i32 %a to i64
%2 = getelementptr inbounds %class.Half, %class.Half* %half_, i64 %1, i32 0, i32 0, i32 0
%3 = load i16, i16* %2, align 2, !tbaa !21
%4 = sext i16 %3 to i32
%5 = add nsw i32 %4, 123
%6 = sitofp i32 %5 to float
ret float %6
}
; Function Attrs: norecurse nounwind
define void @_Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii(%class.TensorEvaluator6* nocapture readonly %structs, float* nocapture %data, %class.GpuDevice* nocapture readnone %gpudevices, i32 %a, i32 %b, i32 %c) #2 {
%1 = sext i32 %a to i64
%2 = getelementptr inbounds %class.TensorEvaluator6, %class.TensorEvaluator6* %structs, i64 %1, i32 0, i32 0
%3 = load %class.Half*, %class.Half** %2, align 8, !tbaa !24
%4 = getelementptr inbounds %class.Half, %class.Half* %3, i64 %1, i32 0, i32 0, i32 0
%5 = load i16, i16* %4, align 2, !tbaa !21
%6 = sext i16 %5 to i32
%7 = add nsw i32 %6, 123
%8 = sitofp i32 %7 to float
store float %8, float* %data, align 4, !tbaa !20
ret void
}
attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "target-features"="+ptx42" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "target-features"="+ptx42" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "target-features"="+ptx42" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind readnone }
!nvvm.annotations = !{!0, !1, !2, !3, !2, !4, !4, !4, !4, !5, !5, !4}
!llvm.module.flags = !{!6}
!llvm.ident = !{!7}
!nvvm.internalize.after.link = !{}
!nvvmir.version = !{!8}
!0 = !{void (float*, %struct.MyStruct*, i32)* @_Z8mykernelPfP8MyStructi, !"kernel", i32 1}
!1 = !{void (%class.TensorEvaluator6*, float*, %class.GpuDevice*, i32, i32, i32)* @_Z11myte6kernelP16TensorEvaluator6PfP9GpuDeviceiii, !"kernel", i32 1}
!2 = !{null, !"align", i32 8}
!3 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080}
!4 = !{null, !"align", i32 16}
!5 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088}
!6 = !{i32 1, !"PIC Level", i32 2}
!7 = !{!"clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)"}
!8 = !{i32 1, i32 2}
!9 = !{!10, !10, i64 0}
!10 = !{!"any pointer", !11, i64 0}
!11 = !{!"omnipotent char", !12, i64 0}
!12 = !{!"Simple C/C++ TBAA"}
!13 = !{!14, !15, i64 0}
!14 = !{!"_ZTS8MyStruct", !15, i64 0, !16, i64 4}
!15 = !{!"float", !11, i64 0}
!16 = !{!"int", !11, i64 0}
!17 = !{!14, !16, i64 4}
!18 = distinct !{!18, !19}
!19 = !{!"llvm.loop.unroll.disable"}
!20 = !{!15, !15, i64 0}
!21 = !{!22, !23, i64 0}
!22 = !{!"_ZTS8HalfImpl", !23, i64 0}
!23 = !{!"short", !11, i64 0}
!24 = !{!25, !10, i64 0}
!25 = !{!"_ZTS16TensorEvaluator6", !26, i64 0, !28, i64 24}
!26 = !{!"_ZTS16TensorEvaluator0", !10, i64 0, !27, i64 8}
!27 = !{!"_ZTS9GpuDevice", !16, i64 0, !10, i64 8}
!28 = !{!"_ZTS16TensorEvaluator7", !10, i64 0, !29, i64 8}
!29 = !{!"_ZTS16TensorEvaluator2", !10, i64 0, !27, i64 8}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment