Skip to content

Instantly share code, notes, and snippets.

@Norod
Last active May 21, 2018 10:38
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Norod/1f84448c9ab33dfc5b84787c11c9c100 to your computer and use it in GitHub Desktop.
Save Norod/1f84448c9ab33dfc5b84787c11c9c100 to your computer and use it in GitHub Desktop.
tensorflow_r1.8_rc1_macOS_patch.diff
exports
-------
export CUDA_HOME=/usr/local/cuda
export DYLD_LIBRARY_PATH=/Users/dadler/lib:/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib
export LD_LIBRARY_PATH=$DYLD_LIBRARY_PATH
export PATH=$DYLD_LIBRARY_PATH:$PATH
bazel
-----
I used bazel 0.10.0 (installed from bazel-0.10.0-installer-darwin-x86_64.sh)
clang
-----
$clang --version
Apple LLVM version 8.1.0 (clang-802.0.42)
Target: x86_64-apple-darwin17.5.0
Thread model: posix
InstalledDir: /Library/Developer/CommandLineTools/usr/bin
configure
---------
CUDA_TOOLKIT_PATH="/usr/local/cuda" TF_UNOFFICIAL_SETTING=1 TF_NEED_CUDA=1 CUDNN_INSTALL_PATH="/usr/local/cuda" TF_CUDA_COMPUTE_CAPABILITIES="3.0" TF_CUDNN_VERSION="7.0" TF_CUDA_VERSION="9.1" TF_CUDA_VERSION_TOOLKIT="9.1" ./configure
builds
------
bazel build --config=cuda --config=opt --verbose_failures --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --action_env PATH --action_env LD_LIBRARY_PATH --action_env DYLD_LIBRARY_PATH //tensorflow/cc:tutorials_example_trainer
bazel build --config=cuda --config=opt --verbose_failures --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --action_env PATH --action_env LD_LIBRARY_PATH --action_env DYLD_LIBRARY_PATH //tensorflow/tools/pip_package:build_pip_package
pack and install
----------------
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-cp36-cp36m-macosx_10_13_x86_64.whl
From b1639e15f212e9cc188276cf20d586f1be035bdb Mon Sep 17 00:00:00 2001
From: Doron Adler <doron@glidetalk.com>
Date: Wed, 25 Apr 2018 20:40:27 +0300
Subject: [PATCH 1/1] tensorflow_r1.8_rc1_macOS_patch
diff --git a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
index a561d918bd..785e0ddf4e 100644
--- a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
+++ b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
@@ -69,7 +69,7 @@ __global__ void concat_variable_kernel(
IntType num_inputs = input_ptr_data.size;
// verbose declaration needed due to template
- extern __shared__ __align__(sizeof(T)) unsigned char smem[];
+ extern __shared__ unsigned char smem[];
IntType* smem_col_scan = reinterpret_cast<IntType*>(smem);
if (useSmem) {
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
index 94989089ec..a2e3e8bc87 100644
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
@@ -172,7 +172,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall(
const DepthwiseArgs args, const T* input, const T* filter, T* output) {
assert(CanLaunchDepthwiseConv2dGPUSmall(args));
// Holds block plus halo and filter data for blockDim.x depths.
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
+ extern __shared__ unsigned char shared_memory[];
T* const shared_data = reinterpret_cast<T*>(shared_memory);
const int num_batches = args.batch;
@@ -452,7 +452,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall(
const DepthwiseArgs args, const T* input, const T* filter, T* output) {
assert(CanLaunchDepthwiseConv2dGPUSmall(args));
// Holds block plus halo and filter data for blockDim.z depths.
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
+ extern __shared__ unsigned char shared_memory[];
T* const shared_data = reinterpret_cast<T*>(shared_memory);
const int num_batches = args.batch;
@@ -1118,7 +1118,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall(
const DepthwiseArgs args, const T* output, const T* input, T* filter) {
assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.z));
// Holds block plus halo and filter data for blockDim.x depths.
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
+ extern __shared__ unsigned char shared_memory[];
T* const shared_data = reinterpret_cast<T*>(shared_memory);
const int num_batches = args.batch;
@@ -1388,7 +1388,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall(
const DepthwiseArgs args, const T* output, const T* input, T* filter) {
assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.x));
// Holds block plus halo and filter data for blockDim.z depths.
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
+ extern __shared__ unsigned char shared_memory[];
T* const shared_data = reinterpret_cast<T*>(shared_memory);
const int num_batches = args.batch;
diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc
index 393818730b..a7d9e02853 100644
--- a/tensorflow/core/kernels/split_lib_gpu.cu.cc
+++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc
@@ -121,7 +121,7 @@ __global__ void split_v_kernel(const T* input_ptr,
int num_outputs = output_ptr_data.size;
// verbose declaration needed due to template
- extern __shared__ __align__(sizeof(T)) unsigned char smem[];
+ extern __shared__ unsigned char smem[];
IntType* smem_col_scan = reinterpret_cast<IntType*>(smem);
if (useSmem) {
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 48728ac131..7696d30dd8 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -330,11 +330,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
tf_http_archive(
name = "protobuf_archive",
urls = [
- "https://mirror.bazel.build/github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz",
- "https://github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz",
+ "https://mirror.bazel.build/github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz",
+ "https://github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz",
],
- sha256 = "846d907acf472ae233ec0882ef3a2d24edbbe834b80c305e867ac65a1f2c59e3",
- strip_prefix = "protobuf-396336eb961b75f03b25824fe86cf6490fb75e3a",
+ sha256 = "eb16b33431b91fe8cee479575cee8de202f3626aaf00d9bf1783c6e62b4ffbc7",
+ strip_prefix = "protobuf-50f552646ba1de79e07562b41f3999fe036b4fd0",
)
# We need to import the protobuf library under the names com_google_protobuf
diff --git a/third_party/gpus/cuda/BUILD.tpl b/third_party/gpus/cuda/BUILD.tpl
index 2a37c65bc7..61b203e005 100644
--- a/third_party/gpus/cuda/BUILD.tpl
+++ b/third_party/gpus/cuda/BUILD.tpl
@@ -110,7 +110,7 @@ cc_library(
".",
"cuda/include",
],
- linkopts = ["-lgomp"],
+ # linkopts = ["-lgomp"],
linkstatic = 1,
visibility = ["//visibility:public"],
)
--
2.17.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment