Created
November 29, 2016 12:25
-
-
Save inferrna/604e9f84ea69f3031f54a84e5edf641a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ make run-tests | |
[ 1%] Built target clew | |
[ 8%] Built target patch-hostside | |
[ 16%] Built target easycl | |
[ 56%] Built target clblast | |
[ 78%] Built target cocl | |
Scanning dependencies of target run-singlebuffer | |
Scanning dependencies of target run-teststream | |
Scanning dependencies of target run-testnullpointer | |
Scanning dependencies of target run-testpartialcopy | |
[ 78%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/singlebuffer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer --add_ir_to_cl | |
[ 80%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/teststream.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream --add_ir_to_cl | |
[ 81%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testnullpointer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer --add_ir_to_cl | |
[ 81%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testpartialcopy.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy --add_ir_to_cl | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testnullpointer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/teststream.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testpartialcopy.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/singlebuffer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-device-noopt.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
'-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-device-noopt.ll | |
1 warning generated. | |
+ + /usr/lib/llvm-3.8/bin/opt/usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-device-noopt.ll | |
-Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testpartialcopy.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-hostraw.ll | |
1 warning generated. | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testnullpointer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-hostraw.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/singlebuffer.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-hostraw.ll | |
test/cocl/teststream.cu:71:21: warning: variable length arrays are a C99 feature [-Wvla-extension] | |
float hostFloats[N]; | |
^ | |
2 warnings generated. | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-device-noopt.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/teststream.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-hostraw.ll | |
warning: unknown warning option warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
'-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
2 warnings generated. | |
2 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-hostpatched.ll | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c -fexceptions /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy-hostpatched.ll -O3 -o -D_GNU_SOURCE /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy.o | |
-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer.o | |
2 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer.o | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,24890ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testnullpointer.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,24911ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testpartialcopy.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGenwarning -lLLVMCppBackendInfo: -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDescunknown -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
test/cocl/teststream.cu:71:21: warning: variable length arrays are a C99 feature [-Wvla-extension] | |
float hostFloats[N]; | |
^ | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,24872ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/singlebuffer.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
3 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream.o | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0x10e2030 | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0x7d5030 | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,24874ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/teststream.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0x166e030 | |
test1 | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0x1a18030 | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
cuStreamCreate redirected new stream 0x18f2810 | |
Memory::newDeviceAlloc context=0x10e2030 bytes=4096 memory=0x18f2de0 clmem=0x18f2c70 | |
cuMemcpyHtoDAsync dst=384 src=0x18ff070 bytes=16 | |
found memory: 0x18f2de0 fakepos=128 bytes=4096 | |
cudaStreamSynchronize queue=0x18f2830 | |
cuMemcpyDtoHAsync queue=0x18f2830 dst=0x18fee70 src=128 bytes=4096 | |
found memory: 0x18f2de0 fakepos=128 bytes=4096 | |
cuMemcpyDtoHAsync dst[0] 6.06749e-08 | |
cudaStreamSynchronize queue=0x18f2830 | |
123.456 | |
444 | |
321 | |
111 | |
found memory: 0x18f2de0 fakepos=128 bytes=4096 | |
cuStreamDestroy_v2 redirected stream=0x18f2810 | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
cuStreamCreate redirected new stream 0x1043280 | |
Memory::newDeviceAlloc context=0x7d5030 bytes=4096 memory=0xbdd780 clmem=0xbdd610 | |
cuMemcpyHtoDAsync dst=128 src=0x1047880 bytes=4096 | |
found memory: 0xbdd780 fakepos=128 bytes=4096 | |
cudaConfigureCall queue=0x10432a0 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
building kernel _Z8getValuePfS_ | |
cocl dump cl set | |
cl: [ | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch); | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) { | |
indata += indata_offset; | |
outdata += outdata_offset; | |
float v4; | |
v1:; | |
/* bool v2 = icmp indata <unk> */; | |
/* float v4 = select v2 <unk> <unk> */; | |
v4 = indata == 0 ? 3.0f : 2.0f; | |
/* void v7 = store v4 outdata */; | |
outdata[0] = v4; | |
return; | |
} | |
] | |
[ 81%] Built target run-testpartialcopy | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
Scanning dependencies of target run-offsetkernelargs | |
cuStreamCreate redirected new stream 0x22343e0 | |
got stream | |
Memory::newDeviceAlloc context=0x1a18030 bytes=409600 memory=0x2234a00 clmem=0x2234890 | |
cudaConfigureCall queue=0x2234400 | |
grid(3200, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z10longKernelPfif | |
building kernel _Z10longKernelPfif | |
[ 82%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/offsetkernelargs.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs --add_ir_to_cl | |
__internal__ build log: | |
"/tmp/OCL25092T1.cl", line 10: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x80 | |
found memory: 0xbdd780 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x10432a0 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cuMemcpyDtoHAsync queue=0x10432a0 dst=0x1047880 src=128 bytes=4096 | |
found memory: 0xbdd780 fakepos=128 bytes=4096 | |
cuMemcpyDtoHAsync dst[0] 3 | |
cudaStreamSynchronize queue=0x10432a0 | |
3 | |
found memory: 0xbdd780 fakepos=128 bytes=4096 | |
cuStreamDestroy_v2 redirected stream=0x1043280 | |
cuStreamCreate redirected | |
cuStreamCreate current context=0x7d5030 | |
cocl dump cl set | |
cl: [ | |
kernel void _Z10longKernelPfif(global float* data, uint data_offset, int N, float value, local int *scratch); | |
kernel void _Z10longKernelPfif(global float* data, uint data_offset, int N, float value, local int *scratch) { | |
data += data_offset; | |
float v24; | |
float v25; | |
float v36; | |
float v37; | |
float v42; | |
float v43; | |
float v49; | |
float v50; | |
float v55; | |
float v56; | |
global float* v23; | |
global float* v35; | |
global float* v41; | |
global float* v48; | |
global float* v54; | |
int v16; | |
int v19; | |
int v20; | |
int v21; | |
int v27; | |
int v29; | |
int v31; | |
int v33; | |
int v58; | |
v1:; | |
/* bool v12 = icmp N <unk> */; | |
/* if(v12) */ | |
if (N > 0) { | |
goto v2; | |
} else { | |
goto v10; | |
} | |
v2:; | |
/* int v14 = add N <unk> */; | |
/* int v16 = and N <unk> */; | |
v16 = N & 3; | |
/* bool v18 = icmp v16 v13 */; | |
/* if(v18) */ | |
if (v16 == 0) { | |
/* int v19 = phi v13 */ | |
v19 = 0; | |
goto v6; | |
} else { | |
goto v3; | |
} | |
v3:; | |
/* int v20 = phi v13 */ | |
v20 = 0; | |
/* int v21 = phi v16 */ | |
v21 = v16; | |
goto v4; | |
v4:; | |
/* long v22 = sext v20 */; | |
/* float* v23 = getelementptr data v22 */; | |
v23 = (&(data[v20])); | |
/* float v24 = load v23 */; | |
v24 = v23[0]; | |
/* float v25 = fadd v24 value */; | |
v25 = v24 + value; | |
/* void v26 = store v25 v23 */; | |
v23[0] = v25; | |
/* int v27 = add v20 <unk> */; | |
v27 = v20 + 1; | |
/* int v29 = add v21 v15 */; | |
v29 = v21 + -1; | |
/* bool v30 = icmp v29 v13 */; | |
/* if(v30) */ | |
if (v29 == 0) { | |
/* int v31 = phi v27 */ | |
v31 = v27; | |
goto v5; | |
} else { | |
/* int v20 = phi v27 */ | |
v20 = v27; | |
/* int v21 = phi v29 */ | |
v21 = v29; | |
goto v4; | |
} | |
v5:; | |
/* int v19 = phi v31 */ | |
v19 = v31; | |
goto v6; | |
v6:; | |
/* bool v32 = icmp v14 v17 */; | |
/* if(v32) */ | |
if (N + -1 < 3) { | |
goto v9; | |
} else { | |
goto v7; | |
} | |
v7:; | |
/* int v33 = phi v19 */ | |
v33 = v19; | |
goto v11; | |
v8:; | |
goto v9; | |
v9:; | |
goto v10; | |
v10:; | |
return; | |
v11:; | |
/* long v34 = sext v33 */; | |
/* float* v35 = getelementptr data v34 */; | |
v35 = (&(data[v33])); | |
/* float v36 = load v35 */; | |
v36 = v35[0]; | |
/* float v37 = fadd v36 value */; | |
v37 = v36 + value; | |
/* void v38 = store v37 v35 */; | |
v35[0] = v37; | |
/* int v39 = add v33 v28 */; | |
/* long v40 = sext v39 */; | |
/* float* v41 = getelementptr data v40 */; | |
v41 = (&(data[v33 + 1])); | |
/* float v42 = load v41 */; | |
v42 = v41[0]; | |
/* float v43 = fadd v42 value */; | |
v43 = v42 + value; | |
/* void v44 = store v43 v41 */; | |
v41[0] = v43; | |
/* int v45 = add v33 <unk> */; | |
/* long v47 = sext v45 */; | |
/* float* v48 = getelementptr data v47 */; | |
v48 = (&(data[v33 + 2])); | |
/* float v49 = load v48 */; | |
v49 = v48[0]; | |
/* float v50 = fadd v49 value */; | |
v50 = v49 + value; | |
/* void v51 = store v50 v48 */; | |
v48[0] = v50; | |
/* int v52 = add v33 v17 */; | |
/* long v53 = sext v52 */; | |
/* float* v54 = getelementptr data v53 */; | |
v54 = (&(data[v33 + 3])); | |
/* float v55 = load v54 */; | |
v55 = v54[0]; | |
/* float v56 = fadd v55 value */; | |
v56 = v55 + value; | |
/* void v57 = store v56 v54 */; | |
v54[0] = v56; | |
/* int v58 = add v33 <unk> */; | |
v58 = v33 + 4; | |
/* bool v60 = icmp v58 N */; | |
/* if(v60) */ | |
if (v58 == N) { | |
goto v8; | |
} else { | |
/* int v33 = phi v58 */ | |
v33 = v58; | |
goto v11; | |
} | |
} | |
] | |
cuStreamCreate redirected new stream 0x1c39b20 | |
Memory::newDeviceAlloc context=0x166e030 bytes=65536 memory=0x1c39200 clmem=0x1c39090 | |
cuMemcpyHtoDAsync dst=640 src=0x1a5a690 bytes=512 | |
found memory: 0x1c39200 fakepos=128 bytes=65536 | |
cudaConfigureCall queue=0x1c38c50 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
building kernel _Z8getValuePfS_ | |
cocl dump cl set | |
cl: [ | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch); | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) { | |
indata += indata_offset; | |
outdata += outdata_offset; | |
float v2; | |
float v3; | |
v1:; | |
/* float v2 = load indata */; | |
v2 = indata[0]; | |
/* float v3 = fadd v2 <unk> */; | |
v3 = v2 + 3.0f; | |
/* void v5 = store v3 outdata */; | |
outdata[0] = v3; | |
return; | |
} | |
] | |
cuStreamCreate redirected new stream 0x1043280 | |
Memory::newDeviceAlloc context=0x7d5030 bytes=4096 memory=0xad5c60 clmem=0x10456d0 | |
cuMemcpyHtoDAsync dst=4224 src=0x1047880 bytes=4096 | |
found memory: 0xad5c60 fakepos=4224 bytes=4096 | |
cudaConfigureCall queue=0x10432a0 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z16checkNullStructs8MyStruct | |
building kernel _Z16checkNullStructs8MyStruct | |
cocl dump cl set | |
cl: [struct MyStruct { | |
global float* f0; | |
global float* f1; | |
}; | |
struct MyStruct_nopointers { | |
int f0; | |
}; | |
kernel void _Z16checkNullStructs8MyStruct(global struct MyStruct_nopointers* mystruct_nopointers, global float* mystruct_ptr0, uint mystruct_ptr0_offset, global float* mystruct_ptr1, uint mystruct_ptr1_offset, local int *scratch); | |
kernel void _Z16checkNullStructs8MyStruct(global struct MyStruct_nopointers* mystruct_nopointers, global float* mystruct_ptr0, uint mystruct_ptr0_offset, global float* mystruct_ptr1, uint mystruct_ptr1_offset, local int *scratch) { | |
mystruct_ptr1 += mystruct_ptr1_offset; | |
mystruct_ptr0 += mystruct_ptr0_offset; | |
struct MyStruct mystruct[1]; | |
mystruct[0].f0 = 0; | |
mystruct[0].f1 = 0; | |
mystruct[0].f0 = mystruct_ptr0; | |
mystruct[0].f1 = mystruct_ptr1; | |
float v7; | |
global float* v11; | |
global float* v4; | |
v1:; | |
/* float** v2 = getelementptr mystruct <unk> <unk> */; | |
/* float* v4 = load v2 */; | |
v4 = (&(mystruct[0].f1))[0]; | |
/* bool v5 = icmp v4 <unk> */; | |
/* float v7 = select v5 <unk> <unk> */; | |
v7 = v4 == 0 ? 9.0f : 8.0f; | |
/* float** v10 = getelementptr mystruct v3 <unk> */; | |
/* float* v11 = load v10 */; | |
v11 = (&(mystruct[0].f0))[0]; | |
/* void v12 = store v7 v11 */; | |
v11[0] = v7; | |
return; | |
} | |
] | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/offsetkernelargs.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-device-noopt.ll | |
__internal__ build log: | |
"/tmp/OCL25099T1.cl", line 11: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x480 | |
found memory: 0x1c39200 fakepos=128 bytes=65536 | |
setKernelArgCharStar 0x280 | |
found memory: 0x1c39200 fakepos=128 bytes=65536 | |
kernelGo queue=0x1c38c50 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cuMemcpyDtoHAsync queue=0x1c38c50 dst=0x1a5a890 src=1152 bytes=512 | |
found memory: 0x1c39200 fakepos=128 bytes=65536 | |
cuMemcpyDtoHAsync dst[0] 126.456 | |
cudaStreamSynchronize queue=0x1c38c50 | |
126.456 | |
found memory: 0x1c39200 fakepos=128 bytes=65536 | |
cuStreamDestroy_v2 redirected stream=0x1c39b20 | |
__internal__ build log: | |
"/tmp/OCL25092T2.cl", line 24: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgStruct structsize=4 | |
setKernelArgCharStar 0x1080 | |
found memory: 0xad5c60 fakepos=4224 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x10432a0 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cuMemcpyDtoHAsync queue=0x10432a0 dst=0x1047880 src=4224 bytes=4096 | |
found memory: 0xad5c60 fakepos=4224 bytes=4096 | |
cuMemcpyDtoHAsync dst[0] 9 | |
cudaStreamSynchronize queue=0x10432a0 | |
9 | |
found memory: 0xad5c60 fakepos=4224 bytes=4096 | |
cuStreamDestroy_v2 redirected stream=0x1043280 | |
cuStreamCreate redirected | |
cuStreamCreate current context=0x166e030 | |
cuStreamCreate redirected new stream 0x1c39b20 | |
Memory::newDeviceAlloc context=0x166e030 bytes=65536 memory=0x1e862b0 clmem=0x19798d0 | |
cuMemcpyHtoDAsync dst=66176 src=0x1a5a690 bytes=128 | |
found memory: 0x1e862b0 fakepos=65664 bytes=65536 | |
cudaConfigureCall queue=0x1c38c50 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z12getValueCharPcS_ | |
building kernel _Z12getValueCharPcS_ | |
cocl dump cl set | |
cl: [ | |
kernel void _Z12getValueCharPcS_(global char* outdata, uint outdata_offset, global char* indata, uint indata_offset, local int *scratch); | |
kernel void _Z12getValueCharPcS_(global char* outdata, uint outdata_offset, global char* indata, uint indata_offset, local int *scratch) { | |
indata += indata_offset; | |
outdata += outdata_offset; | |
char v2; | |
char v6; | |
v1:; | |
/* char v2 = load indata */; | |
v2 = indata[0]; | |
/* int v3 = zext v2 */; | |
/* int v4 = add v3 <unk> */; | |
/* char v6 = trunc v4 */; | |
v6 = (char)(v2 + 3); | |
/* void v7 = store v6 outdata */; | |
outdata[0] = v6; | |
return; | |
} | |
] | |
__internal__ build log: | |
"/tmp/OCL25106T1.cl", line 36: warning: goto statement may cause irreducible | |
control flow | |
goto v2; | |
^ | |
"/tmp/OCL25106T1.cl", line 38: warning: goto statement may cause irreducible | |
control flow | |
goto v10; | |
^ | |
"/tmp/OCL25106T1.cl", line 49: warning: goto statement may cause irreducible | |
control flow | |
goto v6; | |
^ | |
"/tmp/OCL25106T1.cl", line 51: warning: goto statement may cause irreducible | |
control flow | |
goto v3; | |
^ | |
"/tmp/OCL25106T1.cl", line 58: warning: goto statement may cause irreducible | |
control flow | |
goto v4; | |
^ | |
"/tmp/OCL25106T1.cl", line 78: warning: goto statement may cause irreducible | |
control flow | |
goto v5; | |
^ | |
"/tmp/OCL25106T1.cl", line 84: warning: goto statement may cause irreducible | |
control flow | |
goto v4; | |
^ | |
"/tmp/OCL25106T1.cl", line 89: warning: goto statement may cause irreducible | |
control flow | |
goto v6; | |
^ | |
"/tmp/OCL25106T1.cl", line 94: warning: goto statement may cause irreducible | |
control flow | |
goto v9; | |
^ | |
"/tmp/OCL25106T1.cl", line 96: warning: goto statement may cause irreducible | |
control flow | |
goto v7; | |
^ | |
"/tmp/OCL25106T1.cl", line 101: warning: goto statement may cause irreducible | |
control flow | |
goto v11; | |
^ | |
"/tmp/OCL25106T1.cl", line 103: warning: goto statement may cause irreducible | |
control flow | |
goto v9; | |
^ | |
"/tmp/OCL25106T1.cl", line 105: warning: goto statement may cause irreducible | |
control flow | |
goto v10; | |
^ | |
"/tmp/OCL25106T1.cl", line 153: warning: goto statement may cause irreducible | |
control flow | |
goto v8; | |
^ | |
"/tmp/OCL25106T1.cl", line 157: warning: goto statement may cause irreducible | |
control flow | |
goto v11; | |
^ | |
"/tmp/OCL25106T1.cl", line 32: warning: label "v1" was declared but never | |
[ 82%] Built target run-testnullpointer | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x80 | |
found memory: 0x2234a00 fakepos=128 bytes=409600 | |
setKernelArgInt32 102400 | |
setKernelArgFloat 3 | |
kernelGo queue=0x2234400 | |
<<< global=dim3(102400,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
Scanning dependencies of target run-testmath | |
__internal__ build log: | |
"/tmp/OCL25099T2.cl", line 11: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x10480 | |
found memory: 0x1e862b0 fakepos=65664 bytes=65536 | |
setKernelArgCharStar 0x10280 | |
found memory: 0x1e862b0 fakepos=65664 bytes=65536 | |
kernelGo queue=0x1c38c50 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
[ 84%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testmath.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath --add_ir_to_cl | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testmath.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-device-noopt.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
queued kernel 1 | |
Event() | |
cuEventCreate redirected flags=1 new event=0x1dac360 | |
cuEventRecord redirected event=0x1dac360 queue=0x2234400 | |
cuStreamWaitEvent redirected queue=0x2234400 event=0x1dac360 flags=0 | |
cudaConfigureCall queue=0x2234400 | |
grid(3200, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z10longKernelPfif | |
setKernelArgCharStar 0x80 | |
found memory: 0x2234a00 fakepos=128 bytes=409600 | |
setKernelArgInt32 102400 | |
setKernelArgFloat 3 | |
kernelGo queue=0x2234400 | |
<<< global=dim3(102400,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
cuMemcpyDtoHAsync queue=0x1c38c50 dst=0x1a5a890 src=66688 bytes=128 | |
workgroupSize=32 | |
found memory: 0x1e862b0 fakepos=65664 bytes=65536 | |
.. kernel queued | |
cuMemcpyDtoHAsync dst[0] -nan | |
cudaStreamSynchronize queue=0x1c38c50 | |
F | |
found memory: 0x1e862b0 fakepos=65664 bytes=65536 | |
cuStreamDestroy_v2 redirected stream=0x1c39b20 | |
queued kernel 2 | |
cudaStreamSynchronize queue=0x2234400 | |
finished | |
cuEventDestroy redirected event=0x1dac360 | |
~Event() | |
found memory: 0x2234a00 fakepos=128 bytes=409600 | |
cuStreamDestroy_v2 redirected stream=0x22343e0 | |
test2 | |
cuStreamCreate redirected | |
cuStreamCreate current context=0x1a18030 | |
cuStreamCreate redirected new stream 0x22343e0 | |
call cumemalloc | |
Memory::newDeviceAlloc context=0x1a18030 bytes=409600 memory=0x2234a00 clmem=0x2234870 | |
cumemalloc done | |
123 123 123 123 123 123 123 123 123 123 | |
calling cuMemcpyHtoDAsync | |
cuMemcpyHtoDAsync dst=409728 src=0x7ffd1f0a3980 bytes=409600 | |
found memory: 0x2234a00 fakepos=409728 bytes=409600 | |
cuMemcpyHtoDAsync done | |
cudaConfigureCall queue=0x2234400 | |
grid(3200, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z10longKernelPfif | |
setKernelArgCharStar 0x64080 | |
found memory: 0x2234a00 fakepos=409728 bytes=409600 | |
setKernelArgInt32 102400 | |
setKernelArgFloat 3 | |
kernelGo queue=0x2234400 | |
<<< global=dim3(102400,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
queued kernel | |
cuMemcpyDtoHAsync queue=0x2234400 dst=0x7ffd1f0a3980 src=409728 bytes=409600 | |
found memory: 0x2234a00 fakepos=409728 bytes=409600 | |
cuMemcpyDtoHAsync dst[0] 822 | |
queued async copy | |
cudaStreamSynchronize queue=0x2234400 | |
822 1305 1305 1467 1482 1755 1755 1839 2070 2667 | |
found memory: 0x2234a00 fakepos=409728 bytes=409600 | |
cuStreamDestroy_v2 redirected stream=0x22343e0 | |
[ 84%] Built target run-singlebuffer | |
1 warning generated. | |
[ 84%] Built target run-teststream | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-device-noopt.ll | |
Scanning dependencies of target run-testevents | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/offsetkernelargs.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-hostraw.ll | |
Scanning dependencies of target run-testshfl | |
[ 85%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testevents.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents --add_ir_to_cl | |
[ 86%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testshfl.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl --add_ir_to_cl | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testevents.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-device-noopt.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testshfl.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-device-noopt.ll | |
test/cocl/testmath.cu:55:11: warning: unused variable 'diff' [-Wunused-variable] | |
float diff = std::abs(hostFloats1[0] - 140.296); | |
^ | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
2 warnings generated. | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testmath.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-hostraw.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testevents.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-hostraw.ll | |
2 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs.o | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25132ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/offsetkernelargs.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
2 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath.o | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25189ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testmath.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
test/cocl/testshfl.cu:13:9: warning: unused variable 'warpid' [-Wunused-variable] | |
int warpid = tid % 32; // assume warpsize 32. Anyway, CUDA code uses warpsize 32. | |
^ | |
2 warnings generated. | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testshfl.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-hostraw.ll | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0x24ec030 | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0x8dc030 | |
2 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents.o | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25254ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testevents.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
cuStreamCreate redirected new stream 0x28a7230 | |
Memory::newDeviceAlloc context=0x24ec030 bytes=4096 memory=0x28a7800 clmem=0x28a7690 | |
Memory::newDeviceAlloc context=0x24ec030 bytes=4096 memory=0x28a79e0 clmem=0x28a7830 | |
cuMemcpyHtoDAsync dst=128 src=0x27f75a0 bytes=4096 | |
found memory: 0x28a7800 fakepos=128 bytes=4096 | |
cudaConfigureCall queue=0x28a7250 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
building kernel _Z8getValuePfS_ | |
cocl dump cl set | |
cl: [ | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch); | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) { | |
indata += indata_offset; | |
outdata += outdata_offset; | |
float v2; | |
float v3; | |
v1:; | |
/* float v2 = load indata */; | |
v2 = indata[0]; | |
/* float v3 = fadd v2 <unk> */; | |
v3 = v2 + 3.0f; | |
/* void v5 = store v3 outdata */; | |
outdata[0] = v3; | |
return; | |
} | |
] | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
cuStreamCreate redirected new stream 0xc99090 | |
Memory::newDeviceAlloc context=0x8dc030 bytes=4096 memory=0xe2b780 clmem=0xe2b610 | |
cuMemcpyHtoDAsync dst=128 src=0xe2a5f0 bytes=4096 | |
found memory: 0xe2b780 fakepos=128 bytes=4096 | |
cudaConfigureCall queue=0xc990b0 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePf | |
building kernel _Z8getValuePf | |
__internal__ build log: | |
"/tmp/OCL25377T1.cl", line 11: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x1180 | |
found memory: 0x28a79e0 fakepos=4224 bytes=4096 | |
setKernelArgCharStar 0x280 | |
found memory: 0x28a7800 fakepos=128 bytes=4096 | |
kernelGo queue=0x28a7250 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
cocl dump cl set | |
cl: [ | |
kernel void _Z8getValuePf(global float* data, uint data_offset, local int *scratch); | |
kernel void _Z8getValuePf(global float* data, uint data_offset, local int *scratch) { | |
data += data_offset; | |
float v10; | |
float v11; | |
float v12; | |
float v16; | |
float v17; | |
float v18; | |
float v22; | |
float v23; | |
float v24; | |
float v4; | |
float v7; | |
float v8; | |
global float* v13; | |
global float* v19; | |
global float* v25; | |
global float* v2; | |
global float* v5; | |
v1:; | |
/* float* v2 = getelementptr data <unk> */; | |
v2 = (&(data[1])); | |
/* float v4 = load v2 */; | |
v4 = v2[0]; | |
/* float* v5 = getelementptr data <unk> */; | |
v5 = (&(data[2])); | |
/* float v7 = load v5 */; | |
v7 = v5[0]; | |
/* float v8 = call v4 v7 <unk> */; | |
v8 = pow(v4, v7); | |
/* void v9 = store v8 data */; | |
data[0] = v8; | |
/* float v10 = load v2 */; | |
v10 = v2[0]; | |
/* float v11 = load v5 */; | |
v11 = v5[0]; | |
/* float v12 = call v10 v11 <unk> */; | |
v12 = fmin(v10, v11); | |
/* float* v13 = getelementptr data <unk> */; | |
v13 = (&(data[4])); | |
/* void v15 = store v12 v13 */; | |
v13[0] = v12; | |
/* float v16 = load v2 */; | |
v16 = v2[0]; | |
/* float v17 = load v5 */; | |
v17 = v5[0]; | |
/* float v18 = call v16 v17 <unk> */; | |
v18 = fmax(v16, v17); | |
/* float* v19 = getelementptr data <unk> */; | |
v19 = (&(data[5])); | |
/* void v21 = store v18 v19 */; | |
v19[0] = v18; | |
/* float v22 = load v2 */; | |
v22 = v2[0]; | |
/* float v23 = load v5 */; | |
v23 = v5[0]; | |
/* float v24 = call v22 v23 <unk> */; | |
v24 = fmax(v22, v23); | |
/* float* v25 = getelementptr data <unk> */; | |
v25 = (&(data[6])); | |
/* void v27 = store v24 v25 */; | |
v25[0] = v24; | |
return; | |
} | |
] | |
.. kernel queued | |
cuMemcpyDtoHAsync queue=0x28a7250 dst=0x2874750 src=4224 bytes=4096 | |
found memory: 0x28a79e0 fakepos=4224 bytes=4096 | |
cuMemcpyDtoHAsync dst[0] 0.0484016 | |
cudaStreamSynchronize queue=0x28a7250 | |
126.456 | |
found memory: 0x28a7800 fakepos=128 bytes=4096 | |
found memory: 0x28a79e0 fakepos=4224 bytes=4096 | |
cuStreamDestroy_v2 redirected stream=0x28a7230 | |
[ 86%] Built target run-offsetkernelargs | |
Scanning dependencies of target run-multigpu | |
[ 88%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/multigpu.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu --add_ir_to_cl | |
__internal__ build log: | |
"/tmp/OCL25381T1.cl", line 25: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x80 | |
found memory: 0xe2b780 fakepos=128 bytes=4096 | |
kernelGo queue=0xc990b0 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cuMemcpyDtoHAsync queue=0xc990b0 dst=0xe2a5f0 src=128 bytes=4096 | |
found memory: 0xe2b780 fakepos=128 bytes=4096 | |
cuMemcpyDtoHAsync dst[0] 140.296 | |
cudaStreamSynchronize queue=0xc990b0 | |
140.296 | |
3 | |
4.5 | |
3 | |
4.5 | |
found memory: 0xe2b780 fakepos=128 bytes=4096 | |
cuStreamDestroy_v2 redirected stream=0xc99090 | |
[ 88%] Built target run-testmath | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0xa9d030 | |
Scanning dependencies of target run-properties | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/multigpu.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-device-noopt.ll | |
[ 89%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/properties.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties --add_ir_to_cl | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/properties.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-device-noopt.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
test/cocl/testshfl.cu:13:9: warning: unused variable 'warpid' [-Wunused-variable] | |
int warpid = tid % 32; // assume warpsize 32. Anyway, CUDA code uses warpsize 32. | |
^ | |
3 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl.o | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25301ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testshfl.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
cuStreamCreate redirected new stream 0xe86280 | |
Memory::newDeviceAlloc context=0xa9d030 bytes=409600 memory=0xe868a0 clmem=0xe86730 | |
cuMemcpyHtoDAsync dst=128 src=0xdad910 bytes=409600 | |
found memory: 0xe868a0 fakepos=128 bytes=409600 | |
cudaConfigureCall using default_queue | |
cudaConfigureCall queue=0x12b16c0 | |
grid(3200, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z10longKernelPfif | |
building kernel _Z10longKernelPfif | |
cocl dump cl set | |
cl: [ | |
kernel void _Z10longKernelPfif(global float* data, uint data_offset, int N, float value, local int *scratch); | |
kernel void _Z10longKernelPfif(global float* data, uint data_offset, int N, float value, local int *scratch) { | |
data += data_offset; | |
float v24; | |
float v25; | |
float v36; | |
float v37; | |
float v42; | |
float v43; | |
float v49; | |
float v50; | |
float v55; | |
float v56; | |
global float* v23; | |
global float* v35; | |
global float* v41; | |
global float* v48; | |
global float* v54; | |
int v16; | |
int v19; | |
int v20; | |
int v21; | |
int v27; | |
int v29; | |
int v31; | |
int v33; | |
int v58; | |
v1:; | |
/* bool v12 = icmp N <unk> */; | |
/* if(v12) */ | |
if (N > 0) { | |
goto v2; | |
} else { | |
goto v10; | |
} | |
v2:; | |
/* int v14 = add N <unk> */; | |
/* int v16 = and N <unk> */; | |
v16 = N & 3; | |
/* bool v18 = icmp v16 v13 */; | |
/* if(v18) */ | |
if (v16 == 0) { | |
/* int v19 = phi v13 */ | |
v19 = 0; | |
goto v6; | |
} else { | |
goto v3; | |
} | |
v3:; | |
/* int v20 = phi v13 */ | |
v20 = 0; | |
/* int v21 = phi v16 */ | |
v21 = v16; | |
goto v4; | |
v4:; | |
/* long v22 = sext v20 */; | |
/* float* v23 = getelementptr data v22 */; | |
v23 = (&(data[v20])); | |
/* float v24 = load v23 */; | |
v24 = v23[0]; | |
/* float v25 = fadd v24 value */; | |
v25 = v24 + value; | |
/* void v26 = store v25 v23 */; | |
v23[0] = v25; | |
/* int v27 = add v20 <unk> */; | |
v27 = v20 + 1; | |
/* int v29 = add v21 v15 */; | |
v29 = v21 + -1; | |
/* bool v30 = icmp v29 v13 */; | |
/* if(v30) */ | |
if (v29 == 0) { | |
/* int v31 = phi v27 */ | |
v31 = v27; | |
goto v5; | |
} else { | |
/* int v20 = phi v27 */ | |
v20 = v27; | |
/* int v21 = phi v29 */ | |
v21 = v29; | |
goto v4; | |
} | |
v5:; | |
/* int v19 = phi v31 */ | |
v19 = v31; | |
goto v6; | |
v6:; | |
/* bool v32 = icmp v14 v17 */; | |
/* if(v32) */ | |
if (N + -1 < 3) { | |
goto v9; | |
} else { | |
goto v7; | |
} | |
v7:; | |
/* int v33 = phi v19 */ | |
v33 = v19; | |
goto v11; | |
v8:; | |
goto v9; | |
v9:; | |
goto v10; | |
v10:; | |
return; | |
v11:; | |
/* long v34 = sext v33 */; | |
/* float* v35 = getelementptr data v34 */; | |
v35 = (&(data[v33])); | |
/* float v36 = load v35 */; | |
v36 = v35[0]; | |
/* float v37 = fadd v36 value */; | |
v37 = v36 + value; | |
/* void v38 = store v37 v35 */; | |
v35[0] = v37; | |
/* int v39 = add v33 v28 */; | |
/* long v40 = sext v39 */; | |
/* float* v41 = getelementptr data v40 */; | |
v41 = (&(data[v33 + 1])); | |
/* float v42 = load v41 */; | |
v42 = v41[0]; | |
/* float v43 = fadd v42 value */; | |
v43 = v42 + value; | |
/* void v44 = store v43 v41 */; | |
v41[0] = v43; | |
/* int v45 = add v33 <unk> */; | |
/* long v47 = sext v45 */; | |
/* float* v48 = getelementptr data v47 */; | |
v48 = (&(data[v33 + 2])); | |
/* float v49 = load v48 */; | |
v49 = v48[0]; | |
/* float v50 = fadd v49 value */; | |
v50 = v49 + value; | |
/* void v51 = store v50 v48 */; | |
v48[0] = v50; | |
/* int v52 = add v33 v17 */; | |
/* long v53 = sext v52 */; | |
/* float* v54 = getelementptr data v53 */; | |
v54 = (&(data[v33 + 3])); | |
/* float v55 = load v54 */; | |
v55 = v54[0]; | |
/* float v56 = fadd v55 value */; | |
v56 = v55 + value; | |
/* void v57 = store v56 v54 */; | |
v54[0] = v56; | |
/* int v58 = add v33 <unk> */; | |
v58 = v33 + 4; | |
/* bool v60 = icmp v58 N */; | |
/* if(v60) */ | |
if (v58 == N) { | |
goto v8; | |
} else { | |
/* int v33 = phi v58 */ | |
v33 = v58; | |
goto v11; | |
} | |
} | |
] | |
__internal__ build log: | |
"/tmp/OCL25444T1.cl", line 36: warning: goto statement may cause irreducible | |
control flow | |
goto v2; | |
^ | |
"/tmp/OCL25444T1.cl", line 38: warning: goto statement may cause irreducible | |
control flow | |
goto v10; | |
^ | |
"/tmp/OCL25444T1.cl", line 49: warning: goto statement may cause irreducible | |
control flow | |
goto v6; | |
^ | |
"/tmp/OCL25444T1.cl", line 51: warning: goto statement may cause irreducible | |
control flow | |
goto v3; | |
^ | |
"/tmp/OCL25444T1.cl", line 58: warning: goto statement may cause irreducible | |
control flow | |
goto v4; | |
^ | |
"/tmp/OCL25444T1.cl", line 78: warning: goto statement may cause irreducible | |
control flow | |
goto v5; | |
^ | |
"/tmp/OCL25444T1.cl", line 84: warning: goto statement may cause irreducible | |
control flow | |
goto v4; | |
^ | |
"/tmp/OCL25444T1.cl", line 89: warning: goto statement may cause irreducible | |
control flow | |
goto v6; | |
^ | |
"/tmp/OCL25444T1.cl", line 94: warning: goto statement may cause irreducible | |
control flow | |
goto v9; | |
^ | |
"/tmp/OCL25444T1.cl", line 96: warning: goto statement may cause irreducible | |
control flow | |
goto v7; | |
^ | |
"/tmp/OCL25444T1.cl", line 101: warning: goto statement may cause irreducible | |
control flow | |
goto v11; | |
^ | |
"/tmp/OCL25444T1.cl", line 103: warning: goto statement may cause irreducible | |
control flow | |
goto v9; | |
^ | |
"/tmp/OCL25444T1.cl", line 105: warning: goto statement may cause irreducible | |
control flow | |
goto v10; | |
^ | |
"/tmp/OCL25444T1.cl", line 153: warning: goto statement may cause irreducible | |
control flow | |
goto v8; | |
^ | |
"/tmp/OCL25444T1.cl", line 157: warning: goto statement may cause irreducible | |
control flow | |
goto v11; | |
^ | |
"/tmp/OCL25444T1.cl", line 32: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x80 | |
found memory: 0xe868a0 fakepos=128 bytes=409600 | |
setKernelArgInt32 102400 | |
setKernelArgFloat 3 | |
kernelGo queue=0x12b16c0 | |
<<< global=dim3(102400,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0xb24030 | |
queued kernel x | |
cuCtxSynchronize redirected | |
finished | |
found memory: 0xe868a0 fakepos=128 bytes=409600 | |
cuStreamDestroy_v2 redirected stream=0xe86280 | |
[ 89%] Built target run-testevents | |
Scanning dependencies of target run-test_bitcast | |
[ 90%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/test_bitcast.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast --add_ir_to_cl | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/test_bitcast.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-device-noopt.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
test/cocl/multigpu.cu:86:22: warning: variable length arrays are a C99 feature [-Wvla-extension] | |
pthread_t threads[ deviceCount ]; | |
^ | |
2 warnings generated. | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/multigpu.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-hostraw.ll | |
test/cocl/properties.cu:12:11: warning: unused variable 'N' [-Wunused-const-variable] | |
const int N = 1024; | |
^ | |
2 warnings generated. | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/properties.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-hostraw.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
cuStreamCreate redirected new stream 0x10e7e60 | |
Memory::newDeviceAlloc context=0xb24030 bytes=4096 memory=0x10e8480 clmem=0x10e8310 | |
cuMemcpyHtoDAsync dst=128 src=0x10e97a0 bytes=4096 | |
found memory: 0x10e8480 fakepos=128 bytes=4096 | |
cudaConfigureCall queue=0x10e7e80 | |
grid(4, 1, 1) | |
block(128, 1, 1) | |
configureKernel name=_Z8getValuePf | |
building kernel _Z8getValuePf | |
cocl dump cl set | |
cl: [ | |
inline float __shfl_down_3(local int *scratch, float v0, int v1, int v2) { | |
// local float mem[1024]; | |
local float *mem = (local float *)scratch; | |
int tid = get_local_id(0); | |
int warpid = tid % 32; | |
int warpstart = tid - warpid; | |
mem[tid] = v0; | |
//barrier(CLK_LOCAL_MEM_FENCE); | |
int warpsrc = warpid + v1; | |
warpsrc = warpsrc >= 32 ? warpid : warpsrc; | |
return mem[warpstart + warpsrc]; | |
} | |
kernel void _Z8getValuePf(global float* data, uint data_offset, local int *scratch); | |
kernel void _Z8getValuePf(global float* data, uint data_offset, local int *scratch) { | |
data += data_offset; | |
float v5; | |
float v6; | |
global float* v4; | |
int v2; | |
v1:; | |
/* int v2 = call <unk> */; | |
v2 = get_local_id(0); | |
/* long v3 = sext v2 */; | |
/* float* v4 = getelementptr data v3 */; | |
v4 = (&(data[v2])); | |
/* float v5 = load v4 */; | |
v5 = v4[0]; | |
/* float v6 = call v5 <unk> <unk> <unk> */; | |
v6 = __shfl_down_3(scratch, v5, 1, 32); | |
/* void v9 = store v6 v4 */; | |
v4[0] = v6; | |
return; | |
} | |
] | |
__internal__ build log: | |
"/tmp/OCL25521T1.cl", line 26: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x80 | |
found memory: 0x10e8480 fakepos=128 bytes=4096 | |
kernelGo queue=0x10e7e80 | |
<<< global=dim3(512,1,1,), workgroupsize=dim3(128,1,1,)>>> | |
workgroupSize=128 | |
.. kernel queued | |
cuMemcpyDtoHAsync queue=0x10e7e80 dst=0x10e97a0 src=128 bytes=4096 | |
found memory: 0x10e8480 fakepos=128 bytes=4096 | |
cuMemcpyDtoHAsync dst[0] 1001 | |
cudaStreamSynchronize queue=0x10e7e80 | |
1001 | |
1002 | |
1003 | |
1005 | |
1006 | |
found memory: 0x10e8480 fakepos=128 bytes=4096 | |
cuStreamDestroy_v2 redirected stream=0x10e7e60 | |
[ 90%] Built target run-testshfl | |
Scanning dependencies of target run-testblas | |
[ 92%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/testblas.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas --add_ir_to_cl | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/testblas.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-device-noopt.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/test_bitcast.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-hostraw.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
test/cocl/multigpu.cu:86:22: warning: variable length arrays are a C99 feature [-Wvla-extension] | |
pthread_t threads[ deviceCount ]; | |
^ | |
3 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu.o | |
test/cocl/properties.cu:12:11: warning: unused variable 'N' [-Wunused-const-variable] | |
const int N = 1024; | |
^ | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
3 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties.o | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25406ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multigpu.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25462ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/properties.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
cudaGetDeviceCount | |
cudaGetDeviceProperties stub device=0 | |
test/cocl/testblas.cu:12:9: warning: unused variable 'newrows' [-Wunused-variable] | |
int newrows = cols; | |
^ | |
2 warnings generated. | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/testblas.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-hostraw.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
2 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast.o | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
num platforms 1 | |
checking platform id 0x7fcc76176a18 | |
num devices 2 | |
devices: 2 | |
cuCtxCreate_v2 redirected device=0 flags=0 | |
Context() 0x29f0020 | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
cuCtxCreate_v2 new context=0x29f0020 | |
created context 0x29f0020 | |
cuCtxCreate_v2 redirected device=1 flags=0 | |
Context() 0x29eef00 | |
terminate called after throwing an instance of 'std::runtime_error' | |
what(): Not enough OpenCL-enabled GPUs found to satisfy gpu index: 1 | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25532ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/test_bitcast.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
maxworkgroupsize 256 | |
cuMemGetInfo redirected | |
free 1395373056 total 2103008640 | |
[ 92%] Built target run-properties | |
Scanning dependencies of target run-multithreading | |
[ 92%] /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/bin/cocl test/cocl/multithreading.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading --add_ir_to_cl | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-gpu-arch=sm_30 --cuda-device-only -emit-llvm -O0 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_deviceside.h -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include test/cocl/multithreading.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-device-noopt.ll | |
Aborted (core dumped) | |
CMakeFiles/run-multigpu.dir/build.make:57: recipe for target 'CMakeFiles/run-multigpu' failed | |
make[3]: *** [CMakeFiles/run-multigpu] Error 134 | |
CMakeFiles/Makefile2:998: recipe for target 'CMakeFiles/run-multigpu.dir/all' failed | |
make[2]: *** [CMakeFiles/run-multigpu.dir/all] Error 2 | |
make[2]: *** Waiting for unfinished jobs.... | |
creating default context | |
Context() 0x244b030 | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
Memory::newDeviceAlloc context=0x244b030 bytes=128 memory=0x2857a10 clmem=0x2cbe960 | |
Memory::newDeviceAlloc context=0x244b030 bytes=128 memory=0x2cbec90 clmem=0x2cbeae0 | |
Memory::newDeviceAlloc context=0x244b030 bytes=128 memory=0x2cbb490 clmem=0x2805a70 | |
cudamempcy using opencl cudaMemcpyKind 222 count=128 | |
found memory: 0x2857a10 fakepos=128 bytes=128 | |
cudaConfigureCall using default_queue | |
cudaConfigureCall queue=0x28579f0 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8mykernelPiPfS_ | |
building kernel _Z8mykernelPiPfS_ | |
cocl dump cl set | |
cl: [ | |
kernel void _Z8mykernelPiPfS_(global int* int1, uint int1_offset, global float* f1, uint f1_offset, global int* int2, uint int2_offset, local int *scratch); | |
kernel void _Z8mykernelPiPfS_(global int* int1, uint int1_offset, global float* f1, uint f1_offset, global int* int2, uint int2_offset, local int *scratch) { | |
int2 += int2_offset; | |
f1 += f1_offset; | |
int1 += int1_offset; | |
global int* v3; | |
int v2; | |
v1:; | |
/* int v2 = load int1 */; | |
v2 = int1[0]; | |
/* int* v3 = bitcast f1 */; | |
v3 = (global int*)f1; | |
/* void v4 = store v2 v3 */; | |
v3[0] = v2; | |
/* void v5 = store v2 int2 */; | |
int2[0] = v2; | |
return; | |
} | |
] | |
__internal__ build log: | |
"/tmp/OCL25733T1.cl", line 12: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x80 | |
found memory: 0x2857a10 fakepos=128 bytes=128 | |
setKernelArgCharStar 0x100 | |
found memory: 0x2cbec90 fakepos=256 bytes=128 | |
setKernelArgCharStar 0x180 | |
found memory: 0x2cbb490 fakepos=384 bytes=128 | |
kernelGo queue=0x28579f0 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudamempcy using opencl cudaMemcpyKind 111 count=128 | |
cudamemcpy device to host | |
found memory: 0x2cbec90 fakepos=256 bytes=128 | |
cudamempcy using opencl cudaMemcpyKind 111 count=128 | |
cudamemcpy device to host | |
found memory: 0x2cbb490 fakepos=384 bytes=128 | |
this should NOT be 123, should be some weird float value, not even slightly close to 123 : | |
f1[0] 1.7236e-43 | |
this SHOULD be 123 : | |
int2[0] 123 | |
cudamempcy using opencl cudaMemcpyKind 222 count=128 | |
found memory: 0x2857a10 fakepos=128 bytes=128 | |
after copy to device | |
cudaConfigureCall using default_queue | |
cudaConfigureCall queue=0x28579f0 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z10inttofloatPfPi | |
building kernel _Z10inttofloatPfPi | |
cocl dump cl set | |
cl: [ | |
kernel void _Z10inttofloatPfPi(global float* out, uint out_offset, global int* in, uint in_offset, local int *scratch); | |
kernel void _Z10inttofloatPfPi(global float* out, uint out_offset, global int* in, uint in_offset, local int *scratch) { | |
in += in_offset; | |
out += out_offset; | |
global int* v3; | |
int v2; | |
v1:; | |
/* int v2 = load in */; | |
v2 = in[0]; | |
/* int* v3 = bitcast out */; | |
v3 = (global int*)out; | |
/* void v4 = store v2 v3 */; | |
v3[0] = v2; | |
return; | |
} | |
] | |
__internal__ build log: | |
"/tmp/OCL25733T2.cl", line 11: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x100 | |
found memory: 0x2cbec90 fakepos=256 bytes=128 | |
setKernelArgCharStar 0x80 | |
found memory: 0x2857a10 fakepos=128 bytes=128 | |
kernelGo queue=0x28579f0 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
after kernel call 2 | |
cudamempcy using opencl cudaMemcpyKind 111 count=128 | |
cudamemcpy device to host | |
found memory: 0x2cbec90 fakepos=256 bytes=128 | |
f1[0]1.08881e-42 | |
cudaConfigureCall using default_queue | |
cudaConfigureCall queue=0x28579f0 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z10floattointPiPf | |
building kernel _Z10floattointPiPf | |
cocl dump cl set | |
cl: [ | |
kernel void _Z10floattointPiPf(global int* out, uint out_offset, global float* in, uint in_offset, local int *scratch); | |
kernel void _Z10floattointPiPf(global int* out, uint out_offset, global float* in, uint in_offset, local int *scratch) { | |
in += in_offset; | |
out += out_offset; | |
int v3; | |
v1:; | |
/* int* v2 = bitcast in */; | |
/* int v3 = load v2 */; | |
v3 = ((global int*)in)[0]; | |
/* void v4 = store v3 out */; | |
out[0] = v3; | |
return; | |
} | |
] | |
test/cocl/testblas.cu:12:9: warning: unused variable 'newrows' [-Wunused-variable] | |
int newrows = cols; | |
^ | |
__internal__ build log: | |
"/tmp/OCL25733T3.cl", line 10: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x180 | |
found memory: 0x2cbb490 fakepos=384 bytes=128 | |
setKernelArgCharStar 0x100 | |
found memory: 0x2cbec90 fakepos=256 bytes=128 | |
kernelGo queue=0x28579f0 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
after kernel call 3 | |
cudamempcy using opencl cudaMemcpyKind 111 count=128 | |
cudamemcpy device to host | |
found memory: 0x2cbb490 fakepos=384 bytes=128 | |
int2[0]777 | |
3 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas.o | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
[ 92%] Built target run-test_bitcast | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25597ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/testblas.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
1 warning generated. | |
+ /usr/lib/llvm-3.8/bin/opt -S -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-device.ll /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-device-noopt.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -DUSE_CLEW -std=c++11 -x cuda --cuda-host-only -emit-llvm -O3 -S -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/EasyCL -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL/thirdparty/clew/include -I/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/src/EasyCL -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/fake_funcs.h -include /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/include/cocl/cocl_hostside.h test/cocl/multithreading.cu -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-hostraw.ll | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0xb89030 | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
cuStreamCreate redirected new stream 0xe96f50 | |
A: | |
3 5 | |
5 8 | |
2 -1 | |
B: | |
3 5 4 1 | |
5 8 5 7 | |
ATrans: | |
3 5 2 | |
5 8 -1 | |
BTrans: | |
3 5 | |
5 8 | |
4 5 | |
1 7 | |
Memory::newDeviceAlloc context=0xb89030 bytes=4120 memory=0x10d7c50 clmem=0x10d7ae0 | |
Memory::newDeviceAlloc context=0xb89030 bytes=4128 memory=0x10d7df0 clmem=0x10d7c80 | |
Memory::newDeviceAlloc context=0xb89030 bytes=4144 memory=0x10d8010 clmem=0x10d7e60 | |
cuMemcpyHtoDAsync dst=128 src=0x7ffed72eac10 bytes=24 | |
found memory: 0x10d7c50 fakepos=128 bytes=4120 | |
cuMemcpyHtoDAsync dst=4352 src=0x7ffed72eabf0 bytes=32 | |
found memory: 0x10d7df0 fakepos=4352 bytes=4128 | |
found memory: 0x10d7c50 fakepos=128 bytes=4120 | |
found memory: 0x10d7df0 fakepos=4352 bytes=4128 | |
found memory: 0x10d8010 fakepos=8576 bytes=4144 | |
cuMemcpyDtoHAsync queue=0xe96f70 dst=0x7ffed72eab90 src=8576 bytes=48 | |
found memory: 0x10d8010 fakepos=8576 bytes=4144 | |
cuMemcpyDtoHAsync dst[0] 34 | |
cudaStreamSynchronize queue=0xe96f70 | |
C trans: | |
34 55 1 | |
55 89 2 | |
37 60 3 | |
38 61 -5 | |
C: | |
34 55 37 38 | |
55 89 60 61 | |
1 2 3 -5 | |
C check: | |
34 55 37 38 | |
55 89 60 61 | |
1 2 3 -5 | |
found memory: 0x10d7c50 fakepos=128 bytes=4120 | |
found memory: 0x10d7df0 fakepos=4352 bytes=4128 | |
found memory: 0x10d8010 fakepos=8576 bytes=4144 | |
cuStreamDestroy_v2 redirected stream=0xe96f50 | |
finished testblas | |
2 warnings generated. | |
+ /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build/patch-hostside --hostrawfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-hostraw.ll --devicellfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-device.ll --hostpatchedfile /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-hostpatched.ll | |
+ /usr/lib/llvm-3.8/bin/clang++ -I/usr/lib/llvm-3.8/include -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/usr/lib/llvm-3.8/include -std=c++11 -fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -std=c++11 -ffunction-sections -fdata-sections -O2 -fexceptions -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DUSE_CLEW -c /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading-hostpatched.ll -O3 -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading.o | |
[ 92%] Built target run-testblas | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
clang: warning: argument unused during compilation: '-I /usr/lib/llvm-3.8/include' | |
warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] | |
1 warning generated. | |
+ [ ! ] | |
+ g++ -Wl,-rpath,/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -Wl,-rpath,25687ORIGIN -o /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading /media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/cocl/multithreading.o -L/media/Compressed/Drivers_bios/src/dev/tensorflow-cl/third_party/cuda-on-cl/build -lcocl -lclblast -leasycl -lclew -lpthread -L/usr/lib/llvm-3.8/lib -lLLVMLTO -lLLVMObjCARCOpts -lLLVMSymbolize -lLLVMDebugInfoPDB -lLLVMDebugInfoDWARF -lLLVMXCoreDisassembler -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMXCoreAsmPrinter -lLLVMSystemZDisassembler -lLLVMSystemZCodeGen -lLLVMSystemZAsmParser -lLLVMSystemZDesc -lLLVMSystemZInfo -lLLVMSystemZAsmPrinter -lLLVMSparcDisassembler -lLLVMSparcCodeGen -lLLVMSparcAsmParser -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMSparcAsmPrinter -lLLVMPowerPCDisassembler -lLLVMPowerPCCodeGen -lLLVMPowerPCAsmParser -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMMipsDisassembler -lLLVMMipsCodeGen -lLLVMMipsAsmParser -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMHexagonDisassembler -lLLVMHexagonCodeGen -lLLVMHexagonAsmParser -lLLVMHexagonDesc -lLLVMHexagonInfo -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMBPFCodeGen -lLLVMBPFDesc -lLLVMBPFInfo -lLLVMBPFAsmPrinter -lLLVMARMDisassembler -lLLVMARMCodeGen -lLLVMARMAsmParser -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMAMDGPUCodeGen -lLLVMAMDGPUAsmParser -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMAMDGPUInfo -lLLVMAMDGPUAsmPrinter -lLLVMAArch64Disassembler -lLLVMAArch64CodeGen -lLLVMAArch64AsmParser -lLLVMAArch64Desc -lLLVMAArch64Info -lLLVMAArch64AsmPrinter -lLLVMAArch64Utils -lLLVMMIRParser -lLLVMLibDriver -lLLVMOption -lLLVMTableGen -lLLVMLineEditor -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMMCDisassembler -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMPasses -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMIRReader -lLLVMAsmParser -lLLVMDebugInfoCodeView -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMInstrumentation -lLLVMProfileData -lLLVMBitWriter -lLLVMOrcJIT -lLLVMTransformUtils -lLLVMExecutionEngine -lLLVMTarget -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMObject -lLLVMMCParser -lLLVMBitReader -lLLVMMC -lLLVMCore -lLLVMSupport -lrt -ldl -ltinfo -lpthread -lz -lm | |
creaed threads | |
thread 0 | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0x7f859c0008e0 | |
thread 1 | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0x7f85900008e0 | |
thread 2 | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0x7f85880008e0 | |
thread 3 | |
cuStreamCreate redirected | |
cuStreamCreate current context=0 | |
creating default context | |
Context() 0x7f85800008e0 | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
cuStreamCreate redirected new stream 0x7f859c2b86c0 | |
Memory::newDeviceAlloc context=0x7f859c0008e0 bytes=4096 memory=0x7f859c2b8ce0 clmem=0x7f859c2b8b70 | |
cudaConfigureCall queue=0x7f859c2b86e0 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
building kernel _Z8getValuePfS_ | |
cocl dump cl set | |
cl: [ | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch); | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) { | |
indata += indata_offset; | |
outdata += outdata_offset; | |
float v4; | |
v1:; | |
/* bool v2 = icmp indata <unk> */; | |
/* float v4 = select v2 <unk> <unk> */; | |
v4 = indata == 0 ? 3.0f : 2.0f; | |
/* void v7 = store v4 outdata */; | |
outdata[0] = v4; | |
return; | |
} | |
] | |
Using Advanced Micro Devices, Inc. , OpenCL platform: AMD Accelerated Parallel Processing | |
Using OpenCL device: Pitcairn | |
cuStreamCreate redirected new stream 0x7f8590001810 | |
Memory::newDeviceAlloc context=0x7f85900008e0 bytes=4096 memory=0x7f8590001290 clmem=0x7f8590002c80 | |
cuStreamCreate redirected new stream 0x7f8588001810 | |
Memory::newDeviceAlloc context=0x7f85880008e0 bytes=4096 memory=0x7f8588001290 clmem=0x7f8588002c80 | |
cuStreamCreate redirected new stream 0x7f8580001810 | |
Memory::newDeviceAlloc context=0x7f85800008e0 bytes=4096 memory=0x7f8580001290 clmem=0x7f8580002c80 | |
__internal__ build log: | |
"/tmp/OCL25771T1.cl", line 10: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f859c2b8ce0 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f859c2b86e0 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f859c2b86e0 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f859c2b8ce0 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f859c2b86e0 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f859c2b86e0 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f859c2b8ce0 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f859c2b86e0 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f859c2b86e0 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f859c2b8ce0 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f859c2b86e0 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f8590001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
building kernel _Z8getValuePfS_ | |
cocl dump cl set | |
cl: [ | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch); | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) { | |
indata += indata_offset; | |
outdata += outdata_offset; | |
float v4; | |
v1:; | |
/* bool v2 = icmp indata <unk> */; | |
/* float v4 = select v2 <unk> <unk> */; | |
v4 = indata == 0 ? 3.0f : 2.0f; | |
/* void v7 = store v4 outdata */; | |
outdata[0] = v4; | |
return; | |
} | |
] | |
cudaStreamSynchronize queue=0x7f859c2b86e0 | |
num kernels cached 1 | |
num kernels calls 4 | |
found memory: 0x7f859c2b8ce0 fakepos=128 bytes=4096 | |
cuStreamDestroy_v2 redirected stream=0x7f859c2b86c0 | |
joined thread 0 | |
__internal__ build log: | |
"/tmp/OCL25771T2.cl", line 10: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8590001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8590001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f8590001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8590001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8590001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f8590001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8590001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8590001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f8590001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8590001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8590001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaStreamSynchronize queue=0x7f8590001830 | |
num kernels cached 1 | |
num kernels calls 4 | |
found memory: 0x7f8590001290 fakepos=128 bytes=4096 | |
cudaConfigureCall queue=0x7f8580001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
cuStreamDestroy_v2 redirected stream=0x7f8590001810 | |
building kernel _Z8getValuePfS_ | |
cocl dump cl set | |
cl: [ | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch); | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) { | |
indata += indata_offset; | |
outdata += outdata_offset; | |
float v4; | |
v1:; | |
/* bool v2 = icmp indata <unk> */; | |
/* float v4 = select v2 <unk> <unk> */; | |
v4 = indata == 0 ? 3.0f : 2.0f; | |
/* void v7 = store v4 outdata */; | |
outdata[0] = v4; | |
return; | |
} | |
] | |
joined thread 1 | |
__internal__ build log: | |
"/tmp/OCL25771T3.cl", line 10: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8580001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8580001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f8580001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8580001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8580001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f8580001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8580001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8580001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f8580001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8580001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8580001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaStreamSynchronize queue=0x7f8580001830 | |
num kernels cached 1 | |
num kernels calls 4 | |
found memory: 0x7f8580001290 fakepos=128 bytes=4096 | |
cudaConfigureCall queue=0x7f8588001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
cuStreamDestroy_v2 redirected stream=0x7f8580001810 | |
building kernel _Z8getValuePfS_ | |
cocl dump cl set | |
cl: [ | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch); | |
kernel void _Z8getValuePfS_(global float* outdata, uint outdata_offset, global float* indata, uint indata_offset, local int *scratch) { | |
indata += indata_offset; | |
outdata += outdata_offset; | |
float v4; | |
v1:; | |
/* bool v2 = icmp indata <unk> */; | |
/* float v4 = select v2 <unk> <unk> */; | |
v4 = indata == 0 ? 3.0f : 2.0f; | |
/* void v7 = store v4 outdata */; | |
outdata[0] = v4; | |
return; | |
} | |
] | |
__internal__ build log: | |
"/tmp/OCL25771T4.cl", line 10: warning: label "v1" was declared but never | |
referenced | |
v1:; | |
^ | |
... built | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8588001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8588001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f8588001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8588001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8588001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f8588001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8588001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8588001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaConfigureCall queue=0x7f8588001830 | |
grid(1, 1, 1) | |
block(32, 1, 1) | |
configureKernel name=_Z8getValuePfS_ | |
setKernelArgCharStar 0x80 | |
found memory: 0x7f8588001290 fakepos=128 bytes=4096 | |
setKernelArgCharStar 0 | |
kernelGo queue=0x7f8588001830 | |
<<< global=dim3(32,1,1,), workgroupsize=dim3(32,1,1,)>>> | |
workgroupSize=32 | |
.. kernel queued | |
cudaStreamSynchronize queue=0x7f8588001830 | |
num kernels cached 1 | |
num kernels calls 4 | |
found memory: 0x7f8588001290 fakepos=128 bytes=4096 | |
cuStreamDestroy_v2 redirected stream=0x7f8588001810 | |
joined thread 2 | |
joined thread 3 | |
[ 92%] Built target run-multithreading | |
CMakeFiles/Makefile2:208: recipe for target 'CMakeFiles/run-tests.dir/rule' failed | |
make[1]: *** [CMakeFiles/run-tests.dir/rule] Error 2 | |
Makefile:201: recipe for target 'run-tests' failed | |
make: *** [run-tests] Error 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment