Skip to content

Instantly share code, notes, and snippets.

@inferrna
Created November 29, 2016 12:25
Show Gist options
  • Save inferrna/604e9f84ea69f3031f54a84e5edf641a to your computer and use it in GitHub Desktop.
Save inferrna/604e9f84ea69f3031f54a84e5edf641a to your computer and use it in GitHub Desktop.
$ make run-tests
[ 1%] Built target clew
[ 8%] Built target patch-hostside
[ 16%] Built target easycl
[ 56%] Built target clblast
[ 78%] Built target cocl
Scanning dependencies of target run-singlebuffer
Scanning dependencies of target run-teststream
Scanning dependencies of target run-testnullpointer
Scanning dependencies of target run-testpartialcopy
[ 78%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/singlebuffer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer --add_ir_to_cl
[ 80%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/teststream.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream --add_ir_to_cl
[ 81%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testnullpointer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer --add_ir_to_cl
[ 81%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testpartialcopy.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy --add_ir_to_cl
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testnullpointer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/teststream.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testpartialcopy.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/singlebuffer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-device-noopt.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
'-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-device-noopt.ll
1 warning generated.
+ + /usr/lib/llvm-3.8/bin/opt/usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-device-noopt.ll
-Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testpartialcopy.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-hostraw.ll
1 warning generated.
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testnullpointer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-hostraw.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/singlebuffer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-hostraw.ll
test/cocl/teststream.cu:71:21: warning: variable length arrays are a C99 feature [-Wvla-extension]
float hostFloats[N];
^
2 warnings generated.
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-device-noopt.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/teststream.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-hostraw.ll
warning: unknown warning option warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
'-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
2 warnings generated.
2 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-hostpatched.ll
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c -fexceptions /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-hostpatched.ll -O3 -o -D_GNU_SOURCE /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy.o
-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer.o
2 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer.o
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,24890ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,24911ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGenwarning -lLLVMCppBackendInfo: -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDescunknown -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
test/cocl/teststream.cu:71:21: warning: variable length arrays are a C99 feature [-Wvla-extension]
float hostFloats[N];
^
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,24872ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
3 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream.o
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0x10e2030
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0x7d5030
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,24874ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0x166e030
test1
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0x1a18030
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
cuStreamCreate redirected new stream 0x18f2810
Memory::newDeviceAlloc context=0x10e2030 bytes=4096 memory=0x18f2de0 clmem=0x18f2c70
cuMemcpyHtoDAsync dst=384 src=0x18ff070 bytes=16
found memory: 0x18f2de0 fakepos=128 bytes=4096
cudaStreamSynchronize queue=0x18f2830
cuMemcpyDtoHAsync queue=0x18f2830 dst=0x18fee70 src=128 bytes=4096
found memory: 0x18f2de0 fakepos=128 bytes=4096
cuMemcpyDtoHAsync dst[0] 6.06749e-08
cudaStreamSynchronize queue=0x18f2830
123.456
444
321
111
found memory: 0x18f2de0 fakepos=128 bytes=4096
cuStreamDestroy_v2 redirected stream=0x18f2810
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
cuStreamCreate redirected new stream 0x1043280
Memory::newDeviceAlloc context=0x7d5030 bytes=4096 memory=0xbdd780 clmem=0xbdd610
cuMemcpyHtoDAsync dst=128 src=0x1047880 bytes=4096
found memory: 0xbdd780 fakepos=128 bytes=4096
cudaConfigureCall queue=0x10432a0
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
building kernel _Z8getValuePfS_
cocl dump cl set
cl: [
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch);
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) {
indata += indata_offset;
outdata += outdata_offset;
float v4;
v1:;
/* bool v2 = icmp indata <unk> */;
/* float v4 = select v2 <unk> <unk> */;
v4 = indata == 0 ? 3.0f : 2.0f;
/* void v7 = store v4 outdata */;
outdata[0] = v4;
return;
}
]
[ 81%] Built target run-testpartialcopy
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
Scanning dependencies of target run-offsetkernelargs
cuStreamCreate redirected new stream 0x22343e0
got stream
Memory::newDeviceAlloc context=0x1a18030 bytes=409600 memory=0x2234a00 clmem=0x2234890
cudaConfigureCall queue=0x2234400
grid(3200, 1, 1)
block(32, 1, 1)
configureKernel name=_Z10longKernelPfif
building kernel _Z10longKernelPfif
[ 82%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/offsetkernelargs.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs --add_ir_to_cl
__internal__ build log:
"/tmp/OCL25092T1.cl", line 10: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x80
found memory: 0xbdd780 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x10432a0
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cuMemcpyDtoHAsync queue=0x10432a0 dst=0x1047880 src=128 bytes=4096
found memory: 0xbdd780 fakepos=128 bytes=4096
cuMemcpyDtoHAsync dst[0] 3
cudaStreamSynchronize queue=0x10432a0
3
found memory: 0xbdd780 fakepos=128 bytes=4096
cuStreamDestroy_v2 redirected stream=0x1043280
cuStreamCreate redirected
cuStreamCreate current context=0x7d5030
cocl dump cl set
cl: [
kernel void _Z10longKernelPfif(global float* data, uint data_offset, int N, float value, local int *scratch);
kernel void _Z10longKernelPfif(global float* data, uint data_offset, int N, float value, local int *scratch) {
data += data_offset;
float v24;
float v25;
float v36;
float v37;
float v42;
float v43;
float v49;
float v50;
float v55;
float v56;
global float* v23;
global float* v35;
global float* v41;
global float* v48;
global float* v54;
int v16;
int v19;
int v20;
int v21;
int v27;
int v29;
int v31;
int v33;
int v58;
v1:;
/* bool v12 = icmp N <unk> */;
/* if(v12) */
if (N > 0) {
goto v2;
} else {
goto v10;
}
v2:;
/* int v14 = add N <unk> */;
/* int v16 = and N <unk> */;
v16 = N & 3;
/* bool v18 = icmp v16 v13 */;
/* if(v18) */
if (v16 == 0) {
/* int v19 = phi v13 */
v19 = 0;
goto v6;
} else {
goto v3;
}
v3:;
/* int v20 = phi v13 */
v20 = 0;
/* int v21 = phi v16 */
v21 = v16;
goto v4;
v4:;
/* long v22 = sext v20 */;
/* float* v23 = getelementptr data v22 */;
v23 = (&(data[v20]));
/* float v24 = load v23 */;
v24 = v23[0];
/* float v25 = fadd v24 value */;
v25 = v24 + value;
/* void v26 = store v25 v23 */;
v23[0] = v25;
/* int v27 = add v20 <unk> */;
v27 = v20 + 1;
/* int v29 = add v21 v15 */;
v29 = v21 + -1;
/* bool v30 = icmp v29 v13 */;
/* if(v30) */
if (v29 == 0) {
/* int v31 = phi v27 */
v31 = v27;
goto v5;
} else {
/* int v20 = phi v27 */
v20 = v27;
/* int v21 = phi v29 */
v21 = v29;
goto v4;
}
v5:;
/* int v19 = phi v31 */
v19 = v31;
goto v6;
v6:;
/* bool v32 = icmp v14 v17 */;
/* if(v32) */
if (N + -1 < 3) {
goto v9;
} else {
goto v7;
}
v7:;
/* int v33 = phi v19 */
v33 = v19;
goto v11;
v8:;
goto v9;
v9:;
goto v10;
v10:;
return;
v11:;
/* long v34 = sext v33 */;
/* float* v35 = getelementptr data v34 */;
v35 = (&(data[v33]));
/* float v36 = load v35 */;
v36 = v35[0];
/* float v37 = fadd v36 value */;
v37 = v36 + value;
/* void v38 = store v37 v35 */;
v35[0] = v37;
/* int v39 = add v33 v28 */;
/* long v40 = sext v39 */;
/* float* v41 = getelementptr data v40 */;
v41 = (&(data[v33 + 1]));
/* float v42 = load v41 */;
v42 = v41[0];
/* float v43 = fadd v42 value */;
v43 = v42 + value;
/* void v44 = store v43 v41 */;
v41[0] = v43;
/* int v45 = add v33 <unk> */;
/* long v47 = sext v45 */;
/* float* v48 = getelementptr data v47 */;
v48 = (&(data[v33 + 2]));
/* float v49 = load v48 */;
v49 = v48[0];
/* float v50 = fadd v49 value */;
v50 = v49 + value;
/* void v51 = store v50 v48 */;
v48[0] = v50;
/* int v52 = add v33 v17 */;
/* long v53 = sext v52 */;
/* float* v54 = getelementptr data v53 */;
v54 = (&(data[v33 + 3]));
/* float v55 = load v54 */;
v55 = v54[0];
/* float v56 = fadd v55 value */;
v56 = v55 + value;
/* void v57 = store v56 v54 */;
v54[0] = v56;
/* int v58 = add v33 <unk> */;
v58 = v33 + 4;
/* bool v60 = icmp v58 N */;
/* if(v60) */
if (v58 == N) {
goto v8;
} else {
/* int v33 = phi v58 */
v33 = v58;
goto v11;
}
}
]
cuStreamCreate redirected new stream 0x1c39b20
Memory::newDeviceAlloc context=0x166e030 bytes=65536 memory=0x1c39200 clmem=0x1c39090
cuMemcpyHtoDAsync dst=640 src=0x1a5a690 bytes=512
found memory: 0x1c39200 fakepos=128 bytes=65536
cudaConfigureCall queue=0x1c38c50
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
building kernel _Z8getValuePfS_
cocl dump cl set
cl: [
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch);
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) {
indata += indata_offset;
outdata += outdata_offset;
float v2;
float v3;
v1:;
/* float v2 = load indata */;
v2 = indata[0];
/* float v3 = fadd v2 <unk> */;
v3 = v2 + 3.0f;
/* void v5 = store v3 outdata */;
outdata[0] = v3;
return;
}
]
cuStreamCreate redirected new stream 0x1043280
Memory::newDeviceAlloc context=0x7d5030 bytes=4096 memory=0xad5c60 clmem=0x10456d0
cuMemcpyHtoDAsync dst=4224 src=0x1047880 bytes=4096
found memory: 0xad5c60 fakepos=4224 bytes=4096
cudaConfigureCall queue=0x10432a0
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z16checkNullStructs8MyStruct
building kernel _Z16checkNullStructs8MyStruct
cocl dump cl set
cl: [struct MyStruct {
global float* f0;
global float* f1;
};
struct MyStruct_nopointers {
int f0;
};
kernel void _Z16checkNullStructs8MyStruct(global struct MyStruct_nopointers* mystruct_nopointers, global float* mystruct_ptr0, uint mystruct_ptr0_offset, global float* mystruct_ptr1, uint mystruct_ptr1_offset, local int *scratch);
kernel void _Z16checkNullStructs8MyStruct(global struct MyStruct_nopointers* mystruct_nopointers, global float* mystruct_ptr0, uint mystruct_ptr0_offset, global float* mystruct_ptr1, uint mystruct_ptr1_offset, local int *scratch) {
mystruct_ptr1 += mystruct_ptr1_offset;
mystruct_ptr0 += mystruct_ptr0_offset;
struct MyStruct mystruct[1];
mystruct[0].f0 = 0;
mystruct[0].f1 = 0;
mystruct[0].f0 = mystruct_ptr0;
mystruct[0].f1 = mystruct_ptr1;
float v7;
global float* v11;
global float* v4;
v1:;
/* float** v2 = getelementptr mystruct <unk> <unk> */;
/* float* v4 = load v2 */;
v4 = (&(mystruct[0].f1))[0];
/* bool v5 = icmp v4 <unk> */;
/* float v7 = select v5 <unk> <unk> */;
v7 = v4 == 0 ? 9.0f : 8.0f;
/* float** v10 = getelementptr mystruct v3 <unk> */;
/* float* v11 = load v10 */;
v11 = (&(mystruct[0].f0))[0];
/* void v12 = store v7 v11 */;
v11[0] = v7;
return;
}
]
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/offsetkernelargs.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-device-noopt.ll
__internal__ build log:
"/tmp/OCL25099T1.cl", line 11: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x480
found memory: 0x1c39200 fakepos=128 bytes=65536
setKernelArgCharStar 0x280
found memory: 0x1c39200 fakepos=128 bytes=65536
kernelGo queue=0x1c38c50
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cuMemcpyDtoHAsync queue=0x1c38c50 dst=0x1a5a890 src=1152 bytes=512
found memory: 0x1c39200 fakepos=128 bytes=65536
cuMemcpyDtoHAsync dst[0] 126.456
cudaStreamSynchronize queue=0x1c38c50
126.456
found memory: 0x1c39200 fakepos=128 bytes=65536
cuStreamDestroy_v2 redirected stream=0x1c39b20
__internal__ build log:
"/tmp/OCL25092T2.cl", line 24: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgStruct structsize=4
setKernelArgCharStar 0x1080
found memory: 0xad5c60 fakepos=4224 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x10432a0
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cuMemcpyDtoHAsync queue=0x10432a0 dst=0x1047880 src=4224 bytes=4096
found memory: 0xad5c60 fakepos=4224 bytes=4096
cuMemcpyDtoHAsync dst[0] 9
cudaStreamSynchronize queue=0x10432a0
9
found memory: 0xad5c60 fakepos=4224 bytes=4096
cuStreamDestroy_v2 redirected stream=0x1043280
cuStreamCreate redirected
cuStreamCreate current context=0x166e030
cuStreamCreate redirected new stream 0x1c39b20
Memory::newDeviceAlloc context=0x166e030 bytes=65536 memory=0x1e862b0 clmem=0x19798d0
cuMemcpyHtoDAsync dst=66176 src=0x1a5a690 bytes=128
found memory: 0x1e862b0 fakepos=65664 bytes=65536
cudaConfigureCall queue=0x1c38c50
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z12getValueCharPcS_
building kernel _Z12getValueCharPcS_
cocl dump cl set
cl: [
kernel void _Z12getValueCharPcS_(global char* outdata, uint outdata_offset, global char* indata, uint indata_offset, local int *scratch);
kernel void _Z12getValueCharPcS_(global char* outdata, uint outdata_offset, global char* indata, uint indata_offset, local int *scratch) {
indata += indata_offset;
outdata += outdata_offset;
char v2;
char v6;
v1:;
/* char v2 = load indata */;
v2 = indata[0];
/* int v3 = zext v2 */;
/* int v4 = add v3 <unk> */;
/* char v6 = trunc v4 */;
v6 = (char)(v2 + 3);
/* void v7 = store v6 outdata */;
outdata[0] = v6;
return;
}
]
__internal__ build log:
"/tmp/OCL25106T1.cl", line 36: warning: goto statement may cause irreducible
control flow
goto v2;
^
"/tmp/OCL25106T1.cl", line 38: warning: goto statement may cause irreducible
control flow
goto v10;
^
"/tmp/OCL25106T1.cl", line 49: warning: goto statement may cause irreducible
control flow
goto v6;
^
"/tmp/OCL25106T1.cl", line 51: warning: goto statement may cause irreducible
control flow
goto v3;
^
"/tmp/OCL25106T1.cl", line 58: warning: goto statement may cause irreducible
control flow
goto v4;
^
"/tmp/OCL25106T1.cl", line 78: warning: goto statement may cause irreducible
control flow
goto v5;
^
"/tmp/OCL25106T1.cl", line 84: warning: goto statement may cause irreducible
control flow
goto v4;
^
"/tmp/OCL25106T1.cl", line 89: warning: goto statement may cause irreducible
control flow
goto v6;
^
"/tmp/OCL25106T1.cl", line 94: warning: goto statement may cause irreducible
control flow
goto v9;
^
"/tmp/OCL25106T1.cl", line 96: warning: goto statement may cause irreducible
control flow
goto v7;
^
"/tmp/OCL25106T1.cl", line 101: warning: goto statement may cause irreducible
control flow
goto v11;
^
"/tmp/OCL25106T1.cl", line 103: warning: goto statement may cause irreducible
control flow
goto v9;
^
"/tmp/OCL25106T1.cl", line 105: warning: goto statement may cause irreducible
control flow
goto v10;
^
"/tmp/OCL25106T1.cl", line 153: warning: goto statement may cause irreducible
control flow
goto v8;
^
"/tmp/OCL25106T1.cl", line 157: warning: goto statement may cause irreducible
control flow
goto v11;
^
"/tmp/OCL25106T1.cl", line 32: warning: label "v1" was declared but never
[ 82%] Built target run-testnullpointer
referenced
v1:;
^
... built
setKernelArgCharStar 0x80
found memory: 0x2234a00 fakepos=128 bytes=409600
setKernelArgInt32 102400
setKernelArgFloat 3
kernelGo queue=0x2234400
<<< global=dim3(102400,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
Scanning dependencies of target run-testmath
__internal__ build log:
"/tmp/OCL25099T2.cl", line 11: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x10480
found memory: 0x1e862b0 fakepos=65664 bytes=65536
setKernelArgCharStar 0x10280
found memory: 0x1e862b0 fakepos=65664 bytes=65536
kernelGo queue=0x1c38c50
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
[ 84%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testmath.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath --add_ir_to_cl
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testmath.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-device-noopt.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
queued kernel 1
Event()
cuEventCreate redirected flags=1 new event=0x1dac360
cuEventRecord redirected event=0x1dac360 queue=0x2234400
cuStreamWaitEvent redirected queue=0x2234400 event=0x1dac360 flags=0
cudaConfigureCall queue=0x2234400
grid(3200, 1, 1)
block(32, 1, 1)
configureKernel name=_Z10longKernelPfif
setKernelArgCharStar 0x80
found memory: 0x2234a00 fakepos=128 bytes=409600
setKernelArgInt32 102400
setKernelArgFloat 3
kernelGo queue=0x2234400
<<< global=dim3(102400,1,1,), workgroupsize=dim3(32,1,1,)>>>
cuMemcpyDtoHAsync queue=0x1c38c50 dst=0x1a5a890 src=66688 bytes=128
workgroupSize=32
found memory: 0x1e862b0 fakepos=65664 bytes=65536
.. kernel queued
cuMemcpyDtoHAsync dst[0] -nan
cudaStreamSynchronize queue=0x1c38c50
F
found memory: 0x1e862b0 fakepos=65664 bytes=65536
cuStreamDestroy_v2 redirected stream=0x1c39b20
queued kernel 2
cudaStreamSynchronize queue=0x2234400
finished
cuEventDestroy redirected event=0x1dac360
~Event()
found memory: 0x2234a00 fakepos=128 bytes=409600
cuStreamDestroy_v2 redirected stream=0x22343e0
test2
cuStreamCreate redirected
cuStreamCreate current context=0x1a18030
cuStreamCreate redirected new stream 0x22343e0
call cumemalloc
Memory::newDeviceAlloc context=0x1a18030 bytes=409600 memory=0x2234a00 clmem=0x2234870
cumemalloc done
123 123 123 123 123 123 123 123 123 123
calling cuMemcpyHtoDAsync
cuMemcpyHtoDAsync dst=409728 src=0x7ffd1f0a3980 bytes=409600
found memory: 0x2234a00 fakepos=409728 bytes=409600
cuMemcpyHtoDAsync done
cudaConfigureCall queue=0x2234400
grid(3200, 1, 1)
block(32, 1, 1)
configureKernel name=_Z10longKernelPfif
setKernelArgCharStar 0x64080
found memory: 0x2234a00 fakepos=409728 bytes=409600
setKernelArgInt32 102400
setKernelArgFloat 3
kernelGo queue=0x2234400
<<< global=dim3(102400,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
queued kernel
cuMemcpyDtoHAsync queue=0x2234400 dst=0x7ffd1f0a3980 src=409728 bytes=409600
found memory: 0x2234a00 fakepos=409728 bytes=409600
cuMemcpyDtoHAsync dst[0] 822
queued async copy
cudaStreamSynchronize queue=0x2234400
822 1305 1305 1467 1482 1755 1755 1839 2070 2667
found memory: 0x2234a00 fakepos=409728 bytes=409600
cuStreamDestroy_v2 redirected stream=0x22343e0
[ 84%] Built target run-singlebuffer
1 warning generated.
[ 84%] Built target run-teststream
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-device-noopt.ll
Scanning dependencies of target run-testevents
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/offsetkernelargs.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-hostraw.ll
Scanning dependencies of target run-testshfl
[ 85%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testevents.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents --add_ir_to_cl
[ 86%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testshfl.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl --add_ir_to_cl
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testevents.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-device-noopt.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testshfl.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-device-noopt.ll
test/cocl/testmath.cu:55:11: warning: unused variable 'diff' [-Wunused-variable]
float diff = std::abs(hostFloats1[0] - 140.296);
^
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
2 warnings generated.
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testmath.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-hostraw.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testevents.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-hostraw.ll
2 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs.o
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25132ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
2 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath.o
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25189ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
test/cocl/testshfl.cu:13:9: warning: unused variable 'warpid' [-Wunused-variable]
int warpid = tid % 32; // assume warpsize 32. Anyway, CUDA code uses warpsize 32.
^
2 warnings generated.
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testshfl.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-hostraw.ll
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0x24ec030
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0x8dc030
2 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents.o
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25254ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
cuStreamCreate redirected new stream 0x28a7230
Memory::newDeviceAlloc context=0x24ec030 bytes=4096 memory=0x28a7800 clmem=0x28a7690
Memory::newDeviceAlloc context=0x24ec030 bytes=4096 memory=0x28a79e0 clmem=0x28a7830
cuMemcpyHtoDAsync dst=128 src=0x27f75a0 bytes=4096
found memory: 0x28a7800 fakepos=128 bytes=4096
cudaConfigureCall queue=0x28a7250
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
building kernel _Z8getValuePfS_
cocl dump cl set
cl: [
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch);
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) {
indata += indata_offset;
outdata += outdata_offset;
float v2;
float v3;
v1:;
/* float v2 = load indata */;
v2 = indata[0];
/* float v3 = fadd v2 <unk> */;
v3 = v2 + 3.0f;
/* void v5 = store v3 outdata */;
outdata[0] = v3;
return;
}
]
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
cuStreamCreate redirected new stream 0xc99090
Memory::newDeviceAlloc context=0x8dc030 bytes=4096 memory=0xe2b780 clmem=0xe2b610
cuMemcpyHtoDAsync dst=128 src=0xe2a5f0 bytes=4096
found memory: 0xe2b780 fakepos=128 bytes=4096
cudaConfigureCall queue=0xc990b0
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePf
building kernel _Z8getValuePf
__internal__ build log:
"/tmp/OCL25377T1.cl", line 11: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x1180
found memory: 0x28a79e0 fakepos=4224 bytes=4096
setKernelArgCharStar 0x280
found memory: 0x28a7800 fakepos=128 bytes=4096
kernelGo queue=0x28a7250
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
cocl dump cl set
cl: [
kernel void _Z8getValuePf(global float* data, uint data_offset, local int *scratch);
kernel void _Z8getValuePf(global float* data, uint data_offset, local int *scratch) {
data += data_offset;
float v10;
float v11;
float v12;
float v16;
float v17;
float v18;
float v22;
float v23;
float v24;
float v4;
float v7;
float v8;
global float* v13;
global float* v19;
global float* v25;
global float* v2;
global float* v5;
v1:;
/* float* v2 = getelementptr data <unk> */;
v2 = (&(data[1]));
/* float v4 = load v2 */;
v4 = v2[0];
/* float* v5 = getelementptr data <unk> */;
v5 = (&(data[2]));
/* float v7 = load v5 */;
v7 = v5[0];
/* float v8 = call v4 v7 <unk> */;
v8 = pow(v4, v7);
/* void v9 = store v8 data */;
data[0] = v8;
/* float v10 = load v2 */;
v10 = v2[0];
/* float v11 = load v5 */;
v11 = v5[0];
/* float v12 = call v10 v11 <unk> */;
v12 = fmin(v10, v11);
/* float* v13 = getelementptr data <unk> */;
v13 = (&(data[4]));
/* void v15 = store v12 v13 */;
v13[0] = v12;
/* float v16 = load v2 */;
v16 = v2[0];
/* float v17 = load v5 */;
v17 = v5[0];
/* float v18 = call v16 v17 <unk> */;
v18 = fmax(v16, v17);
/* float* v19 = getelementptr data <unk> */;
v19 = (&(data[5]));
/* void v21 = store v18 v19 */;
v19[0] = v18;
/* float v22 = load v2 */;
v22 = v2[0];
/* float v23 = load v5 */;
v23 = v5[0];
/* float v24 = call v22 v23 <unk> */;
v24 = fmax(v22, v23);
/* float* v25 = getelementptr data <unk> */;
v25 = (&(data[6]));
/* void v27 = store v24 v25 */;
v25[0] = v24;
return;
}
]
.. kernel queued
cuMemcpyDtoHAsync queue=0x28a7250 dst=0x2874750 src=4224 bytes=4096
found memory: 0x28a79e0 fakepos=4224 bytes=4096
cuMemcpyDtoHAsync dst[0] 0.0484016
cudaStreamSynchronize queue=0x28a7250
126.456
found memory: 0x28a7800 fakepos=128 bytes=4096
found memory: 0x28a79e0 fakepos=4224 bytes=4096
cuStreamDestroy_v2 redirected stream=0x28a7230
[ 86%] Built target run-offsetkernelargs
Scanning dependencies of target run-multigpu
[ 88%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/multigpu.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu --add_ir_to_cl
__internal__ build log:
"/tmp/OCL25381T1.cl", line 25: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x80
found memory: 0xe2b780 fakepos=128 bytes=4096
kernelGo queue=0xc990b0
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cuMemcpyDtoHAsync queue=0xc990b0 dst=0xe2a5f0 src=128 bytes=4096
found memory: 0xe2b780 fakepos=128 bytes=4096
cuMemcpyDtoHAsync dst[0] 140.296
cudaStreamSynchronize queue=0xc990b0
140.296
3
4.5
3
4.5
found memory: 0xe2b780 fakepos=128 bytes=4096
cuStreamDestroy_v2 redirected stream=0xc99090
[ 88%] Built target run-testmath
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0xa9d030
Scanning dependencies of target run-properties
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/multigpu.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-device-noopt.ll
[ 89%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/properties.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties --add_ir_to_cl
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/properties.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-device-noopt.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
test/cocl/testshfl.cu:13:9: warning: unused variable 'warpid' [-Wunused-variable]
int warpid = tid % 32; // assume warpsize 32. Anyway, CUDA code uses warpsize 32.
^
3 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl.o
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25301ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
cuStreamCreate redirected new stream 0xe86280
Memory::newDeviceAlloc context=0xa9d030 bytes=409600 memory=0xe868a0 clmem=0xe86730
cuMemcpyHtoDAsync dst=128 src=0xdad910 bytes=409600
found memory: 0xe868a0 fakepos=128 bytes=409600
cudaConfigureCall using default_queue
cudaConfigureCall queue=0x12b16c0
grid(3200, 1, 1)
block(32, 1, 1)
configureKernel name=_Z10longKernelPfif
building kernel _Z10longKernelPfif
cocl dump cl set
cl: [
kernel void _Z10longKernelPfif(global float* data, uint data_offset, int N, float value, local int *scratch);
kernel void _Z10longKernelPfif(global float* data, uint data_offset, int N, float value, local int *scratch) {
data += data_offset;
float v24;
float v25;
float v36;
float v37;
float v42;
float v43;
float v49;
float v50;
float v55;
float v56;
global float* v23;
global float* v35;
global float* v41;
global float* v48;
global float* v54;
int v16;
int v19;
int v20;
int v21;
int v27;
int v29;
int v31;
int v33;
int v58;
v1:;
/* bool v12 = icmp N <unk> */;
/* if(v12) */
if (N > 0) {
goto v2;
} else {
goto v10;
}
v2:;
/* int v14 = add N <unk> */;
/* int v16 = and N <unk> */;
v16 = N & 3;
/* bool v18 = icmp v16 v13 */;
/* if(v18) */
if (v16 == 0) {
/* int v19 = phi v13 */
v19 = 0;
goto v6;
} else {
goto v3;
}
v3:;
/* int v20 = phi v13 */
v20 = 0;
/* int v21 = phi v16 */
v21 = v16;
goto v4;
v4:;
/* long v22 = sext v20 */;
/* float* v23 = getelementptr data v22 */;
v23 = (&(data[v20]));
/* float v24 = load v23 */;
v24 = v23[0];
/* float v25 = fadd v24 value */;
v25 = v24 + value;
/* void v26 = store v25 v23 */;
v23[0] = v25;
/* int v27 = add v20 <unk> */;
v27 = v20 + 1;
/* int v29 = add v21 v15 */;
v29 = v21 + -1;
/* bool v30 = icmp v29 v13 */;
/* if(v30) */
if (v29 == 0) {
/* int v31 = phi v27 */
v31 = v27;
goto v5;
} else {
/* int v20 = phi v27 */
v20 = v27;
/* int v21 = phi v29 */
v21 = v29;
goto v4;
}
v5:;
/* int v19 = phi v31 */
v19 = v31;
goto v6;
v6:;
/* bool v32 = icmp v14 v17 */;
/* if(v32) */
if (N + -1 < 3) {
goto v9;
} else {
goto v7;
}
v7:;
/* int v33 = phi v19 */
v33 = v19;
goto v11;
v8:;
goto v9;
v9:;
goto v10;
v10:;
return;
v11:;
/* long v34 = sext v33 */;
/* float* v35 = getelementptr data v34 */;
v35 = (&(data[v33]));
/* float v36 = load v35 */;
v36 = v35[0];
/* float v37 = fadd v36 value */;
v37 = v36 + value;
/* void v38 = store v37 v35 */;
v35[0] = v37;
/* int v39 = add v33 v28 */;
/* long v40 = sext v39 */;
/* float* v41 = getelementptr data v40 */;
v41 = (&(data[v33 + 1]));
/* float v42 = load v41 */;
v42 = v41[0];
/* float v43 = fadd v42 value */;
v43 = v42 + value;
/* void v44 = store v43 v41 */;
v41[0] = v43;
/* int v45 = add v33 <unk> */;
/* long v47 = sext v45 */;
/* float* v48 = getelementptr data v47 */;
v48 = (&(data[v33 + 2]));
/* float v49 = load v48 */;
v49 = v48[0];
/* float v50 = fadd v49 value */;
v50 = v49 + value;
/* void v51 = store v50 v48 */;
v48[0] = v50;
/* int v52 = add v33 v17 */;
/* long v53 = sext v52 */;
/* float* v54 = getelementptr data v53 */;
v54 = (&(data[v33 + 3]));
/* float v55 = load v54 */;
v55 = v54[0];
/* float v56 = fadd v55 value */;
v56 = v55 + value;
/* void v57 = store v56 v54 */;
v54[0] = v56;
/* int v58 = add v33 <unk> */;
v58 = v33 + 4;
/* bool v60 = icmp v58 N */;
/* if(v60) */
if (v58 == N) {
goto v8;
} else {
/* int v33 = phi v58 */
v33 = v58;
goto v11;
}
}
]
__internal__ build log:
"/tmp/OCL25444T1.cl", line 36: warning: goto statement may cause irreducible
control flow
goto v2;
^
"/tmp/OCL25444T1.cl", line 38: warning: goto statement may cause irreducible
control flow
goto v10;
^
"/tmp/OCL25444T1.cl", line 49: warning: goto statement may cause irreducible
control flow
goto v6;
^
"/tmp/OCL25444T1.cl", line 51: warning: goto statement may cause irreducible
control flow
goto v3;
^
"/tmp/OCL25444T1.cl", line 58: warning: goto statement may cause irreducible
control flow
goto v4;
^
"/tmp/OCL25444T1.cl", line 78: warning: goto statement may cause irreducible
control flow
goto v5;
^
"/tmp/OCL25444T1.cl", line 84: warning: goto statement may cause irreducible
control flow
goto v4;
^
"/tmp/OCL25444T1.cl", line 89: warning: goto statement may cause irreducible
control flow
goto v6;
^
"/tmp/OCL25444T1.cl", line 94: warning: goto statement may cause irreducible
control flow
goto v9;
^
"/tmp/OCL25444T1.cl", line 96: warning: goto statement may cause irreducible
control flow
goto v7;
^
"/tmp/OCL25444T1.cl", line 101: warning: goto statement may cause irreducible
control flow
goto v11;
^
"/tmp/OCL25444T1.cl", line 103: warning: goto statement may cause irreducible
control flow
goto v9;
^
"/tmp/OCL25444T1.cl", line 105: warning: goto statement may cause irreducible
control flow
goto v10;
^
"/tmp/OCL25444T1.cl", line 153: warning: goto statement may cause irreducible
control flow
goto v8;
^
"/tmp/OCL25444T1.cl", line 157: warning: goto statement may cause irreducible
control flow
goto v11;
^
"/tmp/OCL25444T1.cl", line 32: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x80
found memory: 0xe868a0 fakepos=128 bytes=409600
setKernelArgInt32 102400
setKernelArgFloat 3
kernelGo queue=0x12b16c0
<<< global=dim3(102400,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0xb24030
queued kernel x
cuCtxSynchronize redirected
finished
found memory: 0xe868a0 fakepos=128 bytes=409600
cuStreamDestroy_v2 redirected stream=0xe86280
[ 89%] Built target run-testevents
Scanning dependencies of target run-test_bitcast
[ 90%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/test_bitcast.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast --add_ir_to_cl
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/test_bitcast.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-device-noopt.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
test/cocl/multigpu.cu:86:22: warning: variable length arrays are a C99 feature [-Wvla-extension]
pthread_t threads[ deviceCount ];
^
2 warnings generated.
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/multigpu.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-hostraw.ll
test/cocl/properties.cu:12:11: warning: unused variable 'N' [-Wunused-const-variable]
const int N = 1024;
^
2 warnings generated.
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/properties.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-hostraw.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
cuStreamCreate redirected new stream 0x10e7e60
Memory::newDeviceAlloc context=0xb24030 bytes=4096 memory=0x10e8480 clmem=0x10e8310
cuMemcpyHtoDAsync dst=128 src=0x10e97a0 bytes=4096
found memory: 0x10e8480 fakepos=128 bytes=4096
cudaConfigureCall queue=0x10e7e80
grid(4, 1, 1)
block(128, 1, 1)
configureKernel name=_Z8getValuePf
building kernel _Z8getValuePf
cocl dump cl set
cl: [
inline float __shfl_down_3(local int *scratch, float v0, int v1, int v2) {
// local float mem[1024];
local float *mem = (local float *)scratch;
int tid = get_local_id(0);
int warpid = tid % 32;
int warpstart = tid - warpid;
mem[tid] = v0;
//barrier(CLK_LOCAL_MEM_FENCE);
int warpsrc = warpid + v1;
warpsrc = warpsrc >= 32 ? warpid : warpsrc;
return mem[warpstart + warpsrc];
}
kernel void _Z8getValuePf(global float* data, uint data_offset, local int *scratch);
kernel void _Z8getValuePf(global float* data, uint data_offset, local int *scratch) {
data += data_offset;
float v5;
float v6;
global float* v4;
int v2;
v1:;
/* int v2 = call <unk> */;
v2 = get_local_id(0);
/* long v3 = sext v2 */;
/* float* v4 = getelementptr data v3 */;
v4 = (&(data[v2]));
/* float v5 = load v4 */;
v5 = v4[0];
/* float v6 = call v5 <unk> <unk> <unk> */;
v6 = __shfl_down_3(scratch, v5, 1, 32);
/* void v9 = store v6 v4 */;
v4[0] = v6;
return;
}
]
__internal__ build log:
"/tmp/OCL25521T1.cl", line 26: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x80
found memory: 0x10e8480 fakepos=128 bytes=4096
kernelGo queue=0x10e7e80
<<< global=dim3(512,1,1,), workgroupsize=dim3(128,1,1,)>>>
workgroupSize=128
.. kernel queued
cuMemcpyDtoHAsync queue=0x10e7e80 dst=0x10e97a0 src=128 bytes=4096
found memory: 0x10e8480 fakepos=128 bytes=4096
cuMemcpyDtoHAsync dst[0] 1001
cudaStreamSynchronize queue=0x10e7e80
1001
1002
1003
1005
1006
found memory: 0x10e8480 fakepos=128 bytes=4096
cuStreamDestroy_v2 redirected stream=0x10e7e60
[ 90%] Built target run-testshfl
Scanning dependencies of target run-testblas
[ 92%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testblas.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas --add_ir_to_cl
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testblas.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-device-noopt.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/test_bitcast.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-hostraw.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
test/cocl/multigpu.cu:86:22: warning: variable length arrays are a C99 feature [-Wvla-extension]
pthread_t threads[ deviceCount ];
^
3 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu.o
test/cocl/properties.cu:12:11: warning: unused variable 'N' [-Wunused-const-variable]
const int N = 1024;
^
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
3 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties.o
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25406ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25462ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
cudaGetDeviceCount
cudaGetDeviceProperties stub device=0
test/cocl/testblas.cu:12:9: warning: unused variable 'newrows' [-Wunused-variable]
int newrows = cols;
^
2 warnings generated.
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testblas.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-hostraw.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
2 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast.o
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
num platforms 1
checking platform id 0x7fcc76176a18
num devices 2
devices: 2
cuCtxCreate_v2 redirected device=0 flags=0
Context() 0x29f0020
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
cuCtxCreate_v2 new context=0x29f0020
created context 0x29f0020
cuCtxCreate_v2 redirected device=1 flags=0
Context() 0x29eef00
terminate called after throwing an instance of 'std::runtime_error'
what(): Not enough OpenCL-enabled GPUs found to satisfy gpu index: 1
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25532ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
maxworkgroupsize 256
cuMemGetInfo redirected
free 1395373056 total 2103008640
[ 92%] Built target run-properties
Scanning dependencies of target run-multithreading
[ 92%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/multithreading.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading --add_ir_to_cl
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/multithreading.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-device-noopt.ll
Aborted (core dumped)
CMakeFiles/run-multigpu.dir/build.make:57: recipe for target 'CMakeFiles/run-multigpu' failed
make[3]: *** [CMakeFiles/run-multigpu] Error 134
CMakeFiles/Makefile2:998: recipe for target 'CMakeFiles/run-multigpu.dir/all' failed
make[2]: *** [CMakeFiles/run-multigpu.dir/all] Error 2
make[2]: *** Waiting for unfinished jobs....
creating default context
Context() 0x244b030
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
Memory::newDeviceAlloc context=0x244b030 bytes=128 memory=0x2857a10 clmem=0x2cbe960
Memory::newDeviceAlloc context=0x244b030 bytes=128 memory=0x2cbec90 clmem=0x2cbeae0
Memory::newDeviceAlloc context=0x244b030 bytes=128 memory=0x2cbb490 clmem=0x2805a70
cudamempcy using opencl cudaMemcpyKind 222 count=128
found memory: 0x2857a10 fakepos=128 bytes=128
cudaConfigureCall using default_queue
cudaConfigureCall queue=0x28579f0
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8mykernelPiPfS_
building kernel _Z8mykernelPiPfS_
cocl dump cl set
cl: [
kernel void _Z8mykernelPiPfS_(global int* int1, uint int1_offset, global float* f1, uint f1_offset, global int* int2, uint int2_offset, local int *scratch);
kernel void _Z8mykernelPiPfS_(global int* int1, uint int1_offset, global float* f1, uint f1_offset, global int* int2, uint int2_offset, local int *scratch) {
int2 += int2_offset;
f1 += f1_offset;
int1 += int1_offset;
global int* v3;
int v2;
v1:;
/* int v2 = load int1 */;
v2 = int1[0];
/* int* v3 = bitcast f1 */;
v3 = (global int*)f1;
/* void v4 = store v2 v3 */;
v3[0] = v2;
/* void v5 = store v2 int2 */;
int2[0] = v2;
return;
}
]
__internal__ build log:
"/tmp/OCL25733T1.cl", line 12: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x80
found memory: 0x2857a10 fakepos=128 bytes=128
setKernelArgCharStar 0x100
found memory: 0x2cbec90 fakepos=256 bytes=128
setKernelArgCharStar 0x180
found memory: 0x2cbb490 fakepos=384 bytes=128
kernelGo queue=0x28579f0
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudamempcy using opencl cudaMemcpyKind 111 count=128
cudamemcpy device to host
found memory: 0x2cbec90 fakepos=256 bytes=128
cudamempcy using opencl cudaMemcpyKind 111 count=128
cudamemcpy device to host
found memory: 0x2cbb490 fakepos=384 bytes=128
this should NOT be 123, should be some weird float value, not even slightly close to 123 :
f1[0] 1.7236e-43
this SHOULD be 123 :
int2[0] 123
cudamempcy using opencl cudaMemcpyKind 222 count=128
found memory: 0x2857a10 fakepos=128 bytes=128
after copy to device
cudaConfigureCall using default_queue
cudaConfigureCall queue=0x28579f0
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z10inttofloatPfPi
building kernel _Z10inttofloatPfPi
cocl dump cl set
cl: [
kernel void _Z10inttofloatPfPi(global float* out, uint out_offset, global int* in, uint in_offset, local int *scratch);
kernel void _Z10inttofloatPfPi(global float* out, uint out_offset, global int* in, uint in_offset, local int *scratch) {
in += in_offset;
out += out_offset;
global int* v3;
int v2;
v1:;
/* int v2 = load in */;
v2 = in[0];
/* int* v3 = bitcast out */;
v3 = (global int*)out;
/* void v4 = store v2 v3 */;
v3[0] = v2;
return;
}
]
__internal__ build log:
"/tmp/OCL25733T2.cl", line 11: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x100
found memory: 0x2cbec90 fakepos=256 bytes=128
setKernelArgCharStar 0x80
found memory: 0x2857a10 fakepos=128 bytes=128
kernelGo queue=0x28579f0
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
after kernel call 2
cudamempcy using opencl cudaMemcpyKind 111 count=128
cudamemcpy device to host
found memory: 0x2cbec90 fakepos=256 bytes=128
f1[0]1.08881e-42
cudaConfigureCall using default_queue
cudaConfigureCall queue=0x28579f0
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z10floattointPiPf
building kernel _Z10floattointPiPf
cocl dump cl set
cl: [
kernel void _Z10floattointPiPf(global int* out, uint out_offset, global float* in, uint in_offset, local int *scratch);
kernel void _Z10floattointPiPf(global int* out, uint out_offset, global float* in, uint in_offset, local int *scratch) {
in += in_offset;
out += out_offset;
int v3;
v1:;
/* int* v2 = bitcast in */;
/* int v3 = load v2 */;
v3 = ((global int*)in)[0];
/* void v4 = store v3 out */;
out[0] = v3;
return;
}
]
test/cocl/testblas.cu:12:9: warning: unused variable 'newrows' [-Wunused-variable]
int newrows = cols;
^
__internal__ build log:
"/tmp/OCL25733T3.cl", line 10: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x180
found memory: 0x2cbb490 fakepos=384 bytes=128
setKernelArgCharStar 0x100
found memory: 0x2cbec90 fakepos=256 bytes=128
kernelGo queue=0x28579f0
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
after kernel call 3
cudamempcy using opencl cudaMemcpyKind 111 count=128
cudamemcpy device to host
found memory: 0x2cbb490 fakepos=384 bytes=128
int2[0]777
3 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas.o
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
[ 92%] Built target run-test_bitcast
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25597ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
1 warning generated.
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-device-noopt.ll
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/multithreading.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-hostraw.ll
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0xb89030
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
cuStreamCreate redirected new stream 0xe96f50
A:
3 5
5 8
2 -1
B:
3 5 4 1
5 8 5 7
ATrans:
3 5 2
5 8 -1
BTrans:
3 5
5 8
4 5
1 7
Memory::newDeviceAlloc context=0xb89030 bytes=4120 memory=0x10d7c50 clmem=0x10d7ae0
Memory::newDeviceAlloc context=0xb89030 bytes=4128 memory=0x10d7df0 clmem=0x10d7c80
Memory::newDeviceAlloc context=0xb89030 bytes=4144 memory=0x10d8010 clmem=0x10d7e60
cuMemcpyHtoDAsync dst=128 src=0x7ffed72eac10 bytes=24
found memory: 0x10d7c50 fakepos=128 bytes=4120
cuMemcpyHtoDAsync dst=4352 src=0x7ffed72eabf0 bytes=32
found memory: 0x10d7df0 fakepos=4352 bytes=4128
found memory: 0x10d7c50 fakepos=128 bytes=4120
found memory: 0x10d7df0 fakepos=4352 bytes=4128
found memory: 0x10d8010 fakepos=8576 bytes=4144
cuMemcpyDtoHAsync queue=0xe96f70 dst=0x7ffed72eab90 src=8576 bytes=48
found memory: 0x10d8010 fakepos=8576 bytes=4144
cuMemcpyDtoHAsync dst[0] 34
cudaStreamSynchronize queue=0xe96f70
C trans:
34 55 1
55 89 2
37 60 3
38 61 -5
C:
34 55 37 38
55 89 60 61
1 2 3 -5
C check:
34 55 37 38
55 89 60 61
1 2 3 -5
found memory: 0x10d7c50 fakepos=128 bytes=4120
found memory: 0x10d7df0 fakepos=4352 bytes=4128
found memory: 0x10d8010 fakepos=8576 bytes=4144
cuStreamDestroy_v2 redirected stream=0xe96f50
finished testblas
2 warnings generated.
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-hostpatched.ll
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading.o
[ 92%] Built target run-testblas
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include'
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option]
1 warning generated.
+ [ ! ]
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25687ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm
creaed threads
thread 0
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0x7f859c0008e0
thread 1
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0x7f85900008e0
thread 2
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0x7f85880008e0
thread 3
cuStreamCreate redirected
cuStreamCreate current context=0
creating default context
Context() 0x7f85800008e0
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
cuStreamCreate redirected new stream 0x7f859c2b86c0
Memory::newDeviceAlloc context=0x7f859c0008e0 bytes=4096 memory=0x7f859c2b8ce0 clmem=0x7f859c2b8b70
cudaConfigureCall queue=0x7f859c2b86e0
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
building kernel _Z8getValuePfS_
cocl dump cl set
cl: [
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch);
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) {
indata += indata_offset;
outdata += outdata_offset;
float v4;
v1:;
/* bool v2 = icmp indata <unk> */;
/* float v4 = select v2 <unk> <unk> */;
v4 = indata == 0 ? 3.0f : 2.0f;
/* void v7 = store v4 outdata */;
outdata[0] = v4;
return;
}
]
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing
Using OpenCL device: Pitcairn
cuStreamCreate redirected new stream 0x7f8590001810
Memory::newDeviceAlloc context=0x7f85900008e0 bytes=4096 memory=0x7f8590001290 clmem=0x7f8590002c80
cuStreamCreate redirected new stream 0x7f8588001810
Memory::newDeviceAlloc context=0x7f85880008e0 bytes=4096 memory=0x7f8588001290 clmem=0x7f8588002c80
cuStreamCreate redirected new stream 0x7f8580001810
Memory::newDeviceAlloc context=0x7f85800008e0 bytes=4096 memory=0x7f8580001290 clmem=0x7f8580002c80
__internal__ build log:
"/tmp/OCL25771T1.cl", line 10: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x80
found memory: 0x7f859c2b8ce0 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f859c2b86e0
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f859c2b86e0
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f859c2b8ce0 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f859c2b86e0
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f859c2b86e0
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f859c2b8ce0 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f859c2b86e0
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f859c2b86e0
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f859c2b8ce0 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f859c2b86e0
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f8590001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
building kernel _Z8getValuePfS_
cocl dump cl set
cl: [
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch);
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) {
indata += indata_offset;
outdata += outdata_offset;
float v4;
v1:;
/* bool v2 = icmp indata <unk> */;
/* float v4 = select v2 <unk> <unk> */;
v4 = indata == 0 ? 3.0f : 2.0f;
/* void v7 = store v4 outdata */;
outdata[0] = v4;
return;
}
]
cudaStreamSynchronize queue=0x7f859c2b86e0
num kernels cached 1
num kernels calls 4
found memory: 0x7f859c2b8ce0 fakepos=128 bytes=4096
cuStreamDestroy_v2 redirected stream=0x7f859c2b86c0
joined thread 0
__internal__ build log:
"/tmp/OCL25771T2.cl", line 10: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x80
found memory: 0x7f8590001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8590001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f8590001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f8590001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8590001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f8590001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f8590001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8590001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f8590001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f8590001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8590001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaStreamSynchronize queue=0x7f8590001830
num kernels cached 1
num kernels calls 4
found memory: 0x7f8590001290 fakepos=128 bytes=4096
cudaConfigureCall queue=0x7f8580001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
cuStreamDestroy_v2 redirected stream=0x7f8590001810
building kernel _Z8getValuePfS_
cocl dump cl set
cl: [
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch);
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) {
indata += indata_offset;
outdata += outdata_offset;
float v4;
v1:;
/* bool v2 = icmp indata <unk> */;
/* float v4 = select v2 <unk> <unk> */;
v4 = indata == 0 ? 3.0f : 2.0f;
/* void v7 = store v4 outdata */;
outdata[0] = v4;
return;
}
]
joined thread 1
__internal__ build log:
"/tmp/OCL25771T3.cl", line 10: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x80
found memory: 0x7f8580001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8580001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f8580001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f8580001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8580001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f8580001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f8580001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8580001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f8580001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f8580001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8580001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaStreamSynchronize queue=0x7f8580001830
num kernels cached 1
num kernels calls 4
found memory: 0x7f8580001290 fakepos=128 bytes=4096
cudaConfigureCall queue=0x7f8588001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
cuStreamDestroy_v2 redirected stream=0x7f8580001810
building kernel _Z8getValuePfS_
cocl dump cl set
cl: [
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch);
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) {
indata += indata_offset;
outdata += outdata_offset;
float v4;
v1:;
/* bool v2 = icmp indata <unk> */;
/* float v4 = select v2 <unk> <unk> */;
v4 = indata == 0 ? 3.0f : 2.0f;
/* void v7 = store v4 outdata */;
outdata[0] = v4;
return;
}
]
__internal__ build log:
"/tmp/OCL25771T4.cl", line 10: warning: label "v1" was declared but never
referenced
v1:;
^
... built
setKernelArgCharStar 0x80
found memory: 0x7f8588001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8588001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f8588001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f8588001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8588001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f8588001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f8588001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8588001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaConfigureCall queue=0x7f8588001830
grid(1, 1, 1)
block(32, 1, 1)
configureKernel name=_Z8getValuePfS_
setKernelArgCharStar 0x80
found memory: 0x7f8588001290 fakepos=128 bytes=4096
setKernelArgCharStar 0
kernelGo queue=0x7f8588001830
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>>
workgroupSize=32
.. kernel queued
cudaStreamSynchronize queue=0x7f8588001830
num kernels cached 1
num kernels calls 4
found memory: 0x7f8588001290 fakepos=128 bytes=4096
cuStreamDestroy_v2 redirected stream=0x7f8588001810
joined thread 2
joined thread 3
[ 92%] Built target run-multithreading
CMakeFiles/Makefile2:208: recipe for target 'CMakeFiles/run-tests.dir/rule' failed
make[1]: *** [CMakeFiles/run-tests.dir/rule] Error 2
Makefile:201: recipe for target 'run-tests' failed
make: *** [run-tests] Error 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment