-
-
Save 74th/31eacbbac6351649caa417b19231f09e to your computer and use it in GitHub Desktop.
diff --git a/tensorflow/core/framework/variant.h b/tensorflow/core/framework/variant.h | |
index c02391dae3..7f76609814 100644 | |
--- a/tensorflow/core/framework/variant.h | |
+++ b/tensorflow/core/framework/variant.h | |
@@ -152,7 +152,8 @@ bool DecodeVariant(const string& buf, T* value); | |
// | |
class Variant { | |
public: | |
- constexpr Variant() noexcept = default; | |
+// constexpr Variant() noexcept = default; | |
+ Variant() noexcept = default; | |
Variant(const Variant& other) | |
: value_(other.is_empty() ? std::unique_ptr<ValueInterface>() | |
diff --git a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc | |
index a561d918bd..785e0ddf4e 100644 | |
--- a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc | |
+++ b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc | |
@@ -69,7 +69,7 @@ __global__ void concat_variable_kernel( | |
IntType num_inputs = input_ptr_data.size; | |
// verbose declaration needed due to template | |
- extern __shared__ __align__(sizeof(T)) unsigned char smem[]; | |
+ extern __shared__ unsigned char smem[]; | |
IntType* smem_col_scan = reinterpret_cast<IntType*>(smem); | |
if (useSmem) { | |
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc | |
index 94989089ec..a2e3e8bc87 100644 | |
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc | |
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc | |
@@ -172,7 +172,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall( | |
const DepthwiseArgs args, const T* input, const T* filter, T* output) { | |
assert(CanLaunchDepthwiseConv2dGPUSmall(args)); | |
// Holds block plus halo and filter data for blockDim.x depths. | |
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; | |
+ extern __shared__ unsigned char shared_memory[]; | |
T* const shared_data = reinterpret_cast<T*>(shared_memory); | |
const int num_batches = args.batch; | |
@@ -452,7 +452,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( | |
const DepthwiseArgs args, const T* input, const T* filter, T* output) { | |
assert(CanLaunchDepthwiseConv2dGPUSmall(args)); | |
// Holds block plus halo and filter data for blockDim.z depths. | |
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; | |
+ extern __shared__ unsigned char shared_memory[]; | |
T* const shared_data = reinterpret_cast<T*>(shared_memory); | |
const int num_batches = args.batch; | |
@@ -1118,7 +1118,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall( | |
const DepthwiseArgs args, const T* output, const T* input, T* filter) { | |
assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.z)); | |
// Holds block plus halo and filter data for blockDim.x depths. | |
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; | |
+ extern __shared__ unsigned char shared_memory[]; | |
T* const shared_data = reinterpret_cast<T*>(shared_memory); | |
const int num_batches = args.batch; | |
@@ -1388,7 +1388,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall( | |
const DepthwiseArgs args, const T* output, const T* input, T* filter) { | |
assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.x)); | |
// Holds block plus halo and filter data for blockDim.z depths. | |
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; | |
+ extern __shared__ unsigned char shared_memory[]; | |
T* const shared_data = reinterpret_cast<T*>(shared_memory); | |
const int num_batches = args.batch; | |
diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc | |
index 393818730b..a7d9e02853 100644 | |
--- a/tensorflow/core/kernels/split_lib_gpu.cu.cc | |
+++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc | |
@@ -121,7 +121,7 @@ __global__ void split_v_kernel(const T* input_ptr, | |
int num_outputs = output_ptr_data.size; | |
// verbose declaration needed due to template | |
- extern __shared__ __align__(sizeof(T)) unsigned char smem[]; | |
+ extern __shared__ unsigned char smem[]; | |
IntType* smem_col_scan = reinterpret_cast<IntType*>(smem); | |
if (useSmem) { | |
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl | |
index 48728ac131..268e4fe2e6 100644 | |
--- a/tensorflow/workspace.bzl | |
+++ b/tensorflow/workspace.bzl | |
@@ -330,11 +330,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): | |
tf_http_archive( | |
name = "protobuf_archive", | |
urls = [ | |
- "https://mirror.bazel.build/github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz", | |
- "https://github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz", | |
+ "https://mirror.bazel.build/github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz", | |
+ "https://github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz", | |
], | |
- sha256 = "846d907acf472ae233ec0882ef3a2d24edbbe834b80c305e867ac65a1f2c59e3", | |
- strip_prefix = "protobuf-396336eb961b75f03b25824fe86cf6490fb75e3a", | |
+ sha256 = "eb16b33431b91fe8cee479575cee8de202f3626aaf00d9bf1783c6e62b4ffbc7", | |
+ strip_prefix = "protobuf-50f552646ba1de79e07562b41f3999fe036b4fd0", | |
) | |
# We need to import the protobuf library under the names com_google_protobuf |
Looks like something with nsync in libtensorflow
Termination Signal: Segmentation fault: 11
Termination Reason: Namespace SIGNAL, Code 0xb
Terminating Process: exc handler [0]
Thread 0:: Dispatch queue: com.apple.main-thread
0 libsystem_kernel.dylib 0x00007fff63af3a1e __psynch_cvwait + 10
1 libsystem_pthread.dylib 0x00007fff63cbc589 pthread_cond_wait + 732
2 libc++.1.dylib 0x00007fff618fbcb0 std::1::condition_variable::wait(std::1::unique_lockstd::__1::mutex&) + 18
3 libtensorflow_framework.so 0x00000001091c8deb nsync::nsync_mu_semaphore_p_with_deadline(nsync::nsync_semaphore_s*, timespec) + 283
4 libtensorflow_framework.so 0x00000001091c5637 nsync::nsync_cv_wait_with_deadline_generic(nsync::nsync_cv_s, void, void ()(void), void ()(void), timespec, nsync::nsync_note_s*) + 423
5 libtensorflow_framework.so 0x00000001091c5da1 nsync::nsync_cv_wait(nsync::nsync_cv_s*, nsync::nsync_mu_s) + 49
6 _pywrap_tensorflow_internal.so 0x000000010eb9ceeb tensorflow::DirectSession::WaitForNotification(tensorflow::Notification, long long) + 155
7 _pywrap_tensorflow_internal.so 0x000000010eb931f6 tensorflow::DirectSession::WaitForNotification(tensorflow::DirectSession::RunState*, tensorflow::CancellationManager*, long long) + 38
8 _pywrap_tensorflow_internal.so 0x000000010eb92ab7 tensorflow::DirectSession::RunInternal(long long, tensorflow::RunOptions const&, tensorflow::CallFrameInterface*, tensorflow::DirectSession::ExecutorsAndKeys*, tensorflow::RunMetadata*) + 2615
9 _pywrap_tensorflow_internal.so 0x000000010eb93c51 tensorflow::DirectSession::Run(tensorflow::RunOptions const&, std::__1::vector<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator >, tensorflow::Tensor>, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator >, tensorflow::Tensor> > > const&, std::__1::vector<std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > > > const&, std::__1::vector<std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > > > const&, std::__1::vector<tensorflow::Tensor, std::__1::allocatortensorflow::Tensor >, tensorflow::RunMetadata) + 1473
Got this trying to run: https://www.tensorflow.org/tutorials/estimators/cnn
@iRonJ Do you fix this problem?
I encountered the Symbol not found: _ncclAllReduce
problem and can confirm that Orang-utan
's solution worked for me.
I have segment 11 error. finally I switch Xcode from 9.2 to 8.3.3 and it's OK now.
@yu-fei
I got Segmentation fault: 11 even with Xcode 8.3.3.
Was there any trick?
This github page needed for the tensorflow patch is gone. So we cannot compile patched TF. How could we solve?
@bennix I've used clang:
Apple LLVM version 9.0.0 (clang-900.0.39.2)
Target: x86_64-apple-darwin17.7.0
Thread model: posix
InstalledDir: /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin
that is clang provided with Xcode 9.2, the one supported by CUDA 9.1.
I've tried both 10.13.5 and 10.13.6.
Which clang/OSX version do you have ?