JasonAtNvidia/helperscript

## helperscript
### Do not run this script directly
#   This script is intended to go along with
#   BuildTensorflow.sh and remove some of the
#   more complicated functions that result
#   in a simple setting of environment variables

whereami=`pwd`
cuda_location=$(dirname $(dirname $(which nvcc)))

# Get at the deviceQuery sample code to extract useful information
# about the Jetson GPU and compute capability

queryout=$(/usr/local/cuda/samples/1_Utilities/deviceQuery/deviceQuery)

compute_capability=$(echo $queryout | \
                     grep -oP "(?<=CUDA Capability Major/Minor version number:)\s*(\d+)\.(\d*)" |
                     awk '$1=$1')

cuda_drive_version=$(echo $queryout | \
                     grep -oP "(?<=CUDA Driver Version =)\s*(\d+)\.(\d*)" | \
                     awk '$1=$1')

cudnn_file=$(locate libcudnn.so | tail -n 1)

cd $whereami

PYTHON_BIN_PATH=$(which $PYTHON)
echo "PYTHON_BIN_PATH=$PYTHON_BIN_PATH"
USE_DEFAULT_PYTHON_LIB_PATH=1

GCC_HOST_COMPILER_PATH=$(which gcc)
echo "GCC_HOST_COMPILER_PATH=$GCC_HOST_COMPILER_PATH"

# Compile CUDA Information
TF_NEED_CUDA=1
CUDA_TOOLKIT_PATH=$cuda_location
echo "CUDA_TOOLKIT_PATH=$CUDA_TOOLKIT_PATH"

TF_CUDA_VERSION=$cuda_drive_version
echo "TF_CUDA_VERSION=$TF_CUDA_VERSION"

TF_CUDA_COMPUTE_CAPABILITIES=5.3,$compute_capability
echo "TF_CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES"

CUDNN_INSTALL_PATH=$(dirname "$cudnn_file")
echo "CUDNN_INSTALL_PATH=$CUDNN_INSTALL_PATH"

TF_CUDNN_VERSION=$(ls -l $CUDNN_INSTALL_PATH | grep -oP '(?<=libcudnn.so.)\s*(\d+)\.(\d*)\.(\d*)\s*' | head -n 1)
echo "TF_CUDNN_VERSION=$TF_CUDNN_VERSION"

# Starting with TF 1.7 TensorRT was built into the TF source code
# Hopefully this was included in your Jetson install
TF_NEED_TENSORRT=1
TENSORRT_INSTALL_PATH=$CUDNN_INSTALL_PATH

# NCCL is an NVIDIA library, but we won't install it more than TF wants at this point
# TF sees 1.3 as default at this time
TF_NCCL_VERSION="1.3"


# Use nvcc as the CUDA compiler
TF_CUDA_CLANG=0

# Google Cloud Platform Support
TF_NEED_GCP=0
# Hadoop File System Support
TF_NEED_HDFS=0
# Kafka file System
TF_NEED_KAFKA=0
# Amazon S3 support
TF_NEED_S3=0
TF_NEED_AWS=0
# Architecture Flags
CC_OPT_FLAGS=-march=native
# JE Malloc
TF_NEED_JEMALLOC=1
# GDR
TF_NEED_GDR=0
# OpenCL Support (This is an NVIDIA board, don't use OpenCL)
TF_NEED_OPENCL=0
# XLA
TF_ENABLE_XLA=0
# NO MKL available for ARM
TF_NEED_MKL=0
# MPI Support (we use the video card, no need for this)
TF_NEED_MPI=0
# VERBS
TF_NEED_VERBS=0

#######################################
# Export Everything
# just happens to be in the order
# that the configure file asks the
# questions
#######################################

export PYTHON_BIN_PATH=$PYTHON_BIN_PATH
export USE_DEFAULT_PYTHON_LIB_PATH=$USE_DEFAULT_PYTHON_LIB_PATH
export TF_NEED_JEMALLOC=$TF_NEED_JEMALLOC
export TF_NEED_GCP=$TF_NEED_GCP
export TF_NEED_HDFS=$TF_NEED_HDFS
export TF_NEED_S3=$TF_NEED_S3
export TF_NEED_AWS=$TF_NEED_AWS
export TF_NEED_KAFKA=$TF_NEED_KAFKA
export TF_ENABLE_XLA=$TF_ENABLE_XLA
export TF_NEED_GDR=$TF_NEED_GDR
export TF_NEED_VERBS=$TF_NEED_VERBS
export TF_NEED_OPENCL=$TF_NEED_OPENCL
export TF_NEED_OPENCL_SYCL=$TF_NEED_OPENCL
export TF_NEED_CUDA=$TF_NEED_CUDA
export TF_CUDA_VERSION=$TF_CUDA_VERSION
export CUDA_TOOLKIT_PATH=$CUDA_TOOLKIT_PATH
export TF_CUDNN_VERSION=$TF_CUDNN_VERSION
export CUDNN_INSTALL_PATH=$CUDNN_INSTALL_PATH
export TF_NEED_TENSORRT=$TF_NEED_TENSORRT
export TENSORRT_INSTALL_PATH=$TENSORRT_INSTALL_PATH
export TF_NCCL_VERSION=$TF_NCCL_VERSION
export TF_CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES
export TF_CUDA_CLANG=$TF_CUDA_CLANG
export GCC_HOST_COMPILER_PATH=$GCC_HOST_COMPILER_PATH
export TF_NEED_MPI=$TF_NEED_MPI
export CC_OPT_FLAGS=$CC_OPT_FLAGS
export TF_SET_ANDROID_WORKSPACE=0


## tf_1.12.0_build.sh
#!/bin/bash

# Perform apt installs for bazel dependencies
sudo apt-get install build-essential openjdk-8-jdk python-dev zip unzip -y

# Tensorflow dependencies
sudo apt-get install python-numpy python-scipy python-pip python-wheel -y
sudo apt-get install python-enum32 python-mock python-h5py -y
sudo apt-get install python3-dev python3-numpy python3-scipy python3-pip python3-wheel -y
sudo apt-get install python3-mock python3-h5py -y
# mlocate will be used to locate libraries needed by TF
sudo apt-get install mlocate -y
sudo updatedb

# TF > 1.10 version requires keras libraries be present for the build
pip install keras_applications==1.0.4 --no-deps
pip install keras_preprocessing==1.0.2 --no-deps

pip3 install keras_applications==1.0.4 --no-deps
pip3 install keras_preprocessing==1.0.2 --no-deps
pip3 install enum34

# Build the deviceQuery script to allow us to pull out device information
cuda_location=/usr/local/cuda
cd $cuda_location/samples/1_Utilities/deviceQuery
sudo make -s

# Go out and get bazel
wget --no-check-certificate https://github.com/bazelbuild/bazel/releases/download/0.15.0/bazel-0.15.0-dist.zip

# unzip the bazel
unzip bazel-0.15.0-dist.zip -d bazel_build

cd bazel_build
./compile.sh

# Move bazel out to the main directory
mv output/bazel ../bazel

# clear up needed space
cd ..
rm -rf bazel_build
rm -rf bazel-0.15.2-dist.zip

# Bazel is done, next we need tensorflow

git clone --single-branch -b v1.12.0 https://github.com/tensorflow/tensorflow

# Prepare the environment, helperscript automagically looks through the device
# and finds the libraries and CUDA information to pass to TF
source ./helperscript

# make sure tensorflow knows bazel exists
export PATH="$PATH:$(`pwd`)"

# configure the build of tensorflow
bash ./tensorflow/configure

# Move into Tensorflow and build it.... YAY
cd tensorflow
git apply ../tf_1.12.0_patch.patch
../bazel build --config=opt --config=cuda  --config=monolithic //tensorflow/tools/pip_package:build_pip_package
../bazel build --config=opt --config=cuda --config=monolithic //tensorflow:libtensorflow_cc.so

## tf_1.12.0_patch.patch
diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
index afb5ec0..4a45afe 100644
--- a/tensorflow/contrib/lite/kernels/internal/BUILD
+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
@@ -21,7 +21,6 @@ HARD_FP_FLAGS_IF_APPLICABLE = select({
 NEON_FLAGS_IF_APPLICABLE = select({
     ":arm": [
         "-O3",
-        "-mfpu=neon",
     ],
     ":armeabi-v7a": [
         "-O3",
diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD
index 5426f79..82d8a0d 100644
--- a/third_party/aws.BUILD
+++ b/third_party/aws.BUILD
@@ -24,7 +24,9 @@ cc_library(
         "@org_tensorflow//tensorflow:raspberry_pi_armeabi": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
-        "//conditions:default": [],
+        "//conditions:default": glob([
+            "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
+	]),
     }) + glob([
         "aws-cpp-sdk-core/include/**/*.h",
         "aws-cpp-sdk-core/source/*.cpp",
	### Do not run this script directly
	# This script is intended to go along with
	# BuildTensorflow.sh and remove some of the
	# more complicated functions that result
	# in a simple setting of environment variables

	whereami=`pwd`
	cuda_location=$(dirname $(dirname $(which nvcc)))

	# Get at the deviceQuery sample code to extract useful information
	# about the Jetson GPU and compute capability

	queryout=$(/usr/local/cuda/samples/1_Utilities/deviceQuery/deviceQuery)

	compute_capability=$(echo $queryout \| \
	grep -oP "(?<=CUDA Capability Major/Minor version number:)\s(\d+)\.(\d)" \|
	awk '$1=$1')

	cuda_drive_version=$(echo $queryout \| \
	grep -oP "(?<=CUDA Driver Version =)\s(\d+)\.(\d)" \| \
	awk '$1=$1')

	cudnn_file=$(locate libcudnn.so \| tail -n 1)

	cd $whereami

	PYTHON_BIN_PATH=$(which $PYTHON)
	echo "PYTHON_BIN_PATH=$PYTHON_BIN_PATH"
	USE_DEFAULT_PYTHON_LIB_PATH=1

	GCC_HOST_COMPILER_PATH=$(which gcc)
	echo "GCC_HOST_COMPILER_PATH=$GCC_HOST_COMPILER_PATH"

	# Compile CUDA Information
	TF_NEED_CUDA=1
	CUDA_TOOLKIT_PATH=$cuda_location
	echo "CUDA_TOOLKIT_PATH=$CUDA_TOOLKIT_PATH"

	TF_CUDA_VERSION=$cuda_drive_version
	echo "TF_CUDA_VERSION=$TF_CUDA_VERSION"

	TF_CUDA_COMPUTE_CAPABILITIES=5.3,$compute_capability
	echo "TF_CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES"

	CUDNN_INSTALL_PATH=$(dirname "$cudnn_file")
	echo "CUDNN_INSTALL_PATH=$CUDNN_INSTALL_PATH"

	TF_CUDNN_VERSION=$(ls -l $CUDNN_INSTALL_PATH \| grep -oP '(?<=libcudnn.so.)\s(\d+)\.(\d)\.(\d)\s' \| head -n 1)
	echo "TF_CUDNN_VERSION=$TF_CUDNN_VERSION"

	# Starting with TF 1.7 TensorRT was built into the TF source code
	# Hopefully this was included in your Jetson install
	TF_NEED_TENSORRT=1
	TENSORRT_INSTALL_PATH=$CUDNN_INSTALL_PATH

	# NCCL is an NVIDIA library, but we won't install it more than TF wants at this point
	# TF sees 1.3 as default at this time
	TF_NCCL_VERSION="1.3"


	# Use nvcc as the CUDA compiler
	TF_CUDA_CLANG=0

	# Google Cloud Platform Support
	TF_NEED_GCP=0
	# Hadoop File System Support
	TF_NEED_HDFS=0
	# Kafka file System
	TF_NEED_KAFKA=0
	# Amazon S3 support
	TF_NEED_S3=0
	TF_NEED_AWS=0
	# Architecture Flags
	CC_OPT_FLAGS=-march=native
	# JE Malloc
	TF_NEED_JEMALLOC=1
	# GDR
	TF_NEED_GDR=0
	# OpenCL Support (This is an NVIDIA board, don't use OpenCL)
	TF_NEED_OPENCL=0
	# XLA
	TF_ENABLE_XLA=0
	# NO MKL available for ARM
	TF_NEED_MKL=0
	# MPI Support (we use the video card, no need for this)
	TF_NEED_MPI=0
	# VERBS
	TF_NEED_VERBS=0

	#######################################
	# Export Everything
	# just happens to be in the order
	# that the configure file asks the
	# questions
	#######################################

	export PYTHON_BIN_PATH=$PYTHON_BIN_PATH
	export USE_DEFAULT_PYTHON_LIB_PATH=$USE_DEFAULT_PYTHON_LIB_PATH
	export TF_NEED_JEMALLOC=$TF_NEED_JEMALLOC
	export TF_NEED_GCP=$TF_NEED_GCP
	export TF_NEED_HDFS=$TF_NEED_HDFS
	export TF_NEED_S3=$TF_NEED_S3
	export TF_NEED_AWS=$TF_NEED_AWS
	export TF_NEED_KAFKA=$TF_NEED_KAFKA
	export TF_ENABLE_XLA=$TF_ENABLE_XLA
	export TF_NEED_GDR=$TF_NEED_GDR
	export TF_NEED_VERBS=$TF_NEED_VERBS
	export TF_NEED_OPENCL=$TF_NEED_OPENCL
	export TF_NEED_OPENCL_SYCL=$TF_NEED_OPENCL
	export TF_NEED_CUDA=$TF_NEED_CUDA
	export TF_CUDA_VERSION=$TF_CUDA_VERSION
	export CUDA_TOOLKIT_PATH=$CUDA_TOOLKIT_PATH
	export TF_CUDNN_VERSION=$TF_CUDNN_VERSION
	export CUDNN_INSTALL_PATH=$CUDNN_INSTALL_PATH
	export TF_NEED_TENSORRT=$TF_NEED_TENSORRT
	export TENSORRT_INSTALL_PATH=$TENSORRT_INSTALL_PATH
	export TF_NCCL_VERSION=$TF_NCCL_VERSION
	export TF_CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES
	export TF_CUDA_CLANG=$TF_CUDA_CLANG
	export GCC_HOST_COMPILER_PATH=$GCC_HOST_COMPILER_PATH
	export TF_NEED_MPI=$TF_NEED_MPI
	export CC_OPT_FLAGS=$CC_OPT_FLAGS
	export TF_SET_ANDROID_WORKSPACE=0
	#!/bin/bash

	# Perform apt installs for bazel dependencies
	sudo apt-get install build-essential openjdk-8-jdk python-dev zip unzip -y

	# Tensorflow dependencies
	sudo apt-get install python-numpy python-scipy python-pip python-wheel -y
	sudo apt-get install python-enum32 python-mock python-h5py -y
	sudo apt-get install python3-dev python3-numpy python3-scipy python3-pip python3-wheel -y
	sudo apt-get install python3-mock python3-h5py -y
	# mlocate will be used to locate libraries needed by TF
	sudo apt-get install mlocate -y
	sudo updatedb

	# TF > 1.10 version requires keras libraries be present for the build
	pip install keras_applications==1.0.4 --no-deps
	pip install keras_preprocessing==1.0.2 --no-deps

	pip3 install keras_applications==1.0.4 --no-deps
	pip3 install keras_preprocessing==1.0.2 --no-deps
	pip3 install enum34

	# Build the deviceQuery script to allow us to pull out device information
	cuda_location=/usr/local/cuda
	cd $cuda_location/samples/1_Utilities/deviceQuery
	sudo make -s

	# Go out and get bazel
	wget --no-check-certificate https://github.com/bazelbuild/bazel/releases/download/0.15.0/bazel-0.15.0-dist.zip

	# unzip the bazel
	unzip bazel-0.15.0-dist.zip -d bazel_build

	cd bazel_build
	./compile.sh

	# Move bazel out to the main directory
	mv output/bazel ../bazel

	# clear up needed space
	cd ..
	rm -rf bazel_build
	rm -rf bazel-0.15.2-dist.zip

	# Bazel is done, next we need tensorflow

	git clone --single-branch -b v1.12.0 https://github.com/tensorflow/tensorflow

	# Prepare the environment, helperscript automagically looks through the device
	# and finds the libraries and CUDA information to pass to TF
	source ./helperscript

	# make sure tensorflow knows bazel exists
	export PATH="$PATH:$(`pwd`)"

	# configure the build of tensorflow
	bash ./tensorflow/configure

	# Move into Tensorflow and build it.... YAY
	cd tensorflow
	git apply ../tf_1.12.0_patch.patch
	../bazel build --config=opt --config=cuda --config=monolithic //tensorflow/tools/pip_package:build_pip_package
	../bazel build --config=opt --config=cuda --config=monolithic //tensorflow:libtensorflow_cc.so
	diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
	index afb5ec0..4a45afe 100644
	--- a/tensorflow/contrib/lite/kernels/internal/BUILD
	+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
	@@ -21,7 +21,6 @@ HARD_FP_FLAGS_IF_APPLICABLE = select({
	NEON_FLAGS_IF_APPLICABLE = select({
	":arm": [
	"-O3",
	- "-mfpu=neon",
	],
	":armeabi-v7a": [
	"-O3",
	diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD
	index 5426f79..82d8a0d 100644
	--- a/third_party/aws.BUILD
	+++ b/third_party/aws.BUILD
	@@ -24,7 +24,9 @@ cc_library(
	"@org_tensorflow//tensorflow:raspberry_pi_armeabi": glob([
	"aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
	]),
	- "//conditions:default": [],
	+ "//conditions:default": glob([
	+ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
	+ ]),
	}) + glob([
	"aws-cpp-sdk-core/include/*/.h",
	"aws-cpp-sdk-core/source/*.cpp",