Skip to content

Instantly share code, notes, and snippets.

@rxwei
Last active August 15, 2018 20:21
Show Gist options
  • Save rxwei/993deb4c9ed51c875e74f5ca4e074d3d to your computer and use it in GitHub Desktop.
Save rxwei/993deb4c9ed51c875e74f5ca4e074d3d to your computer and use it in GitHub Desktop.
TensorFlow macOS GPU patch (base: 8453e23a8b)
diff --git a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
index a561d918bd..785e0ddf4e 100644
--- a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
+++ b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc
@@ -69,7 +69,7 @@ __global__ void concat_variable_kernel(
IntType num_inputs = input_ptr_data.size;
// verbose declaration needed due to template
- extern __shared__ __align__(sizeof(T)) unsigned char smem[];
+ extern __shared__ unsigned char smem[];
IntType* smem_col_scan = reinterpret_cast<IntType*>(smem);
if (useSmem) {
diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc
index 393818730b..a7d9e02853 100644
--- a/tensorflow/core/kernels/split_lib_gpu.cu.cc
+++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc
@@ -121,7 +121,7 @@ __global__ void split_v_kernel(const T* input_ptr,
int num_outputs = output_ptr_data.size;
// verbose declaration needed due to template
- extern __shared__ __align__(sizeof(T)) unsigned char smem[];
+ extern __shared__ unsigned char smem[];
IntType* smem_col_scan = reinterpret_cast<IntType*>(smem);
if (useSmem) {
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment