Skip to content

Instantly share code, notes, and snippets.

@74th
Last active May 3, 2018 13:01
Show Gist options
  • Save 74th/e06c5f243f83728cbf189d3b1e813e9c to your computer and use it in GitHub Desktop.
Save 74th/e06c5f243f83728cbf189d3b1e813e9c to your computer and use it in GitHub Desktop.
TensorFlow v1.7.0 MacOS Nvidia GPU
diff --git a/tensorflow/core/framework/variant.h b/tensorflow/core/framework/variant.h
index c02391dae3..23e56ed294 100644
--- a/tensorflow/core/framework/variant.h
+++ b/tensorflow/core/framework/variant.h
@@ -152,7 +152,8 @@ bool DecodeVariant(const string& buf, T* value);
//
class Variant {
public:
- constexpr Variant() noexcept = default;
+ //constexpr Variant() noexcept = default;
+ Variant() noexcept = default;
Variant(const Variant& other)
: value_(other.is_empty() ? std::unique_ptr<ValueInterface>()
diff --git a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
index 0f7adaf24a..8d89c66f3f 100644
--- a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
+++ b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
@@ -69,7 +69,7 @@ __global__ void concat_variable_kernel(
IntType num_inputs = input_ptr_data.size;
// verbose declaration needed due to template
- extern __shared__ __align__(sizeof(T)) unsigned char smem[];
+ extern __shared__ unsigned char smem[];
IntType* smem_col_scan = reinterpret_cast<IntType*>(smem);
if (useSmem) {
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
index 94989089ec..a2e3e8bc87 100644
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
@@ -172,7 +172,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall(
const DepthwiseArgs args, const T* input, const T* filter, T* output) {
assert(CanLaunchDepthwiseConv2dGPUSmall(args));
// Holds block plus halo and filter data for blockDim.x depths.
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
+ extern __shared__ unsigned char shared_memory[];
T* const shared_data = reinterpret_cast<T*>(shared_memory);
const int num_batches = args.batch;
@@ -452,7 +452,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall(
const DepthwiseArgs args, const T* input, const T* filter, T* output) {
assert(CanLaunchDepthwiseConv2dGPUSmall(args));
// Holds block plus halo and filter data for blockDim.z depths.
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
+ extern __shared__ unsigned char shared_memory[];
T* const shared_data = reinterpret_cast<T*>(shared_memory);
const int num_batches = args.batch;
@@ -1118,7 +1118,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall(
const DepthwiseArgs args, const T* output, const T* input, T* filter) {
assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.z));
// Holds block plus halo and filter data for blockDim.x depths.
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
+ extern __shared__ unsigned char shared_memory[];
T* const shared_data = reinterpret_cast<T*>(shared_memory);
const int num_batches = args.batch;
@@ -1388,7 +1388,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall(
const DepthwiseArgs args, const T* output, const T* input, T* filter) {
assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.x));
// Holds block plus halo and filter data for blockDim.z depths.
- extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[];
+ extern __shared__ unsigned char shared_memory[];
T* const shared_data = reinterpret_cast<T*>(shared_memory);
const int num_batches = args.batch;
diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc
index 393818730b..a7d9e02853 100644
--- a/tensorflow/core/kernels/split_lib_gpu.cu.cc
+++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc
@@ -121,7 +121,7 @@ __global__ void split_v_kernel(const T* input_ptr,
int num_outputs = output_ptr_data.size;
// verbose declaration needed due to template
- extern __shared__ __align__(sizeof(T)) unsigned char smem[];
+ extern __shared__ unsigned char smem[];
IntType* smem_col_scan = reinterpret_cast<IntType*>(smem);
if (useSmem) {
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 0ce5cda517..3ede0d340f 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -127,11 +127,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
tf_http_archive(
name = "eigen_archive",
urls = [
- "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/2355b229ea4c.tar.gz",
- "https://bitbucket.org/eigen/eigen/get/2355b229ea4c.tar.gz",
+ "https://mirror.bazel.build/bitbucket.org/dtrebbien/eigen/get/374842a18727.tar.gz",
+ "https://bitbucket.org/dtrebbien/eigen/get/374842a18727.tar.gz",
],
- sha256 = "0cadb31a35b514bf2dfd6b5d38205da94ef326ec6908fc3fd7c269948467214f",
- strip_prefix = "eigen-eigen-2355b229ea4c",
+ sha256 = "fa26e9b9ff3a2692b092d154685ec88d6cb84d4e1e895006541aff8603f15c16",
+ strip_prefix = "dtrebbien-eigen-374842a18727",
build_file = str(Label("//third_party:eigen.BUILD")),
patch_file = str(Label("//third_party:eigen_fix_cuda_compilation.patch"))
)
@@ -361,11 +361,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
tf_http_archive(
name = "protobuf_archive",
urls = [
- "https://mirror.bazel.build/github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz",
- "https://github.com/google/protobuf/archive/396336eb961b75f03b25824fe86cf6490fb75e3a.tar.gz",
+ "https://mirror.bazel.build/github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz",
+ "https://github.com/dtrebbien/protobuf/archive/50f552646ba1de79e07562b41f3999fe036b4fd0.tar.gz",
],
- sha256 = "846d907acf472ae233ec0882ef3a2d24edbbe834b80c305e867ac65a1f2c59e3",
- strip_prefix = "protobuf-396336eb961b75f03b25824fe86cf6490fb75e3a",
+ sha256 = "eb16b33431b91fe8cee479575cee8de202f3626aaf00d9bf1783c6e62b4ffbc7",
+ strip_prefix = "protobuf-50f552646ba1de79e07562b41f3999fe036b4fd0",
)
# We need to import the protobuf library under the names com_google_protobuf
@74th
Copy link
Author

74th commented Mar 29, 2018

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment