jkjung-avt/install_tensorflow-2.6.0.sh

## install_tensorflow-2.6.0.sh
s#!/bin/bash

set -e

# tensorflow version
version=2.6.0

if [[ ! $(head -1 /etc/nv_tegra_release) =~ R32.*6\.1 ]] ; then
  echo "ERROR: not JetPack-4.6"
  exit 1
fi

case $(cat /sys/module/tegra_fuse/parameters/tegra_chip_id) in
  "33" )  # Nano and TX1
    cuda_compute=5.3
    ;;
  "24" )  # TX2
    cuda_compute=6.2
    ;;
  "25" )  # Xavier NX and AGX Xavier
    cuda_compute=7.2
    ;;
  * )     # default
    cuda_compute=5.3,6.2,7.2
    ;;
esac

script_path=$(realpath $0)
patch_path=$(dirname $script_path)/tensorflow/tensorflow-${version}.patch
trt_version=$(echo /usr/lib/aarch64-linux-gnu/libnvinfer.so.? | cut -d '.' -f 3)

src_folder=${HOME}/src
mkdir -p $src_folder

if pip3 list | grep tensorflow > /dev/null; then
  echo "ERROR: tensorflow is installed already"
  exit 1
fi

if [[ -z $(pip3 list | grep numpy) ]]; then
  echo "ERROR: missing numpy"
  exit 1
fi

if ! which bazel > /dev/null; then
  echo "ERROR: bazel has not been installled"
  exit 1
fi

echo "** Install requirements"
sudo apt install -y llvm-10* clang-10*
sudo apt install -y libhdf5-serial-dev hdf5-tools libhdf5-dev zlib1g-dev \
                    zip libjpeg8-dev liblapack-dev libblas-dev gfortran
sudo pip3 install -U six wheel setuptools typing_extensions
sudo pip3 install pkgconfig
sudo env H5PY_SETUP_REQUIRES=0 pip3 install -U h5py==3.1.0
sudo pip3 install -U future==0.18.2 mock==3.0.5 \
                     keras_preprocessing==1.1.2 keras_applications==1.0.8 \
                     gast==0.4.0 futures pybind11

echo "** Download and patch tensorflow-${version}"
pushd $src_folder
if [ ! -f tensorflow-${version}.tar.gz ]; then
  wget https://github.com/tensorflow/tensorflow/archive/v${version}.tar.gz -O tensorflow-${version}.tar.gz
fi
tar xzvf tensorflow-${version}.tar.gz
cd tensorflow-${version}

patch -N -p1 < $patch_path && \
  echo "tensorflow-${version} source tree appears to be patched already.  Continue..."

echo "** Configure and build tensorflow-${version}"
export TMP=/tmp
export CC=/usr/bin/clang-10
export CXX=/usr/bin/clang++-10
export CXXFLAGS="-stdlib=libc++"

PYTHON_BIN_PATH=$(which python3) \
PYTHON_LIB_PATH=$(python3 -c 'import site; print(site.getsitepackages()[0])') \
TF_CUDA_COMPUTE_CAPABILITIES=${cuda_compute} \
TF_CUDA_VERSION=10.2 \
TF_CUDA_CLANG=1 \
TF_DOWNLOAD_CLANG=0 \
CLANG_CUDA_COMPILER_PATH=/usr/bin/clang-10 \
TF_CUDNN_VERSION=8 \
TF_TENSORRT_VERSION=${trt_version} \
CUDA_TOOLKIT_PATH=/usr/local/cuda \
CUDNN_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \
TENSORRT_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \
TF_NEED_IGNITE=0 \
TF_ENABLE_XLA=0 \
TF_NEED_OPENCL_SYCL=0 \
TF_NEED_COMPUTECPP=0 \
TF_NEED_ROCM=0 \
TF_NEED_CUDA=1 \
TF_NEED_TENSORRT=1 \
TF_NEED_OPENCL=0 \
TF_NEED_MPI=0 \
GCC_HOST_COMPILER_PATH=$(which clang-10) \
CC_OPT_FLAGS="-Wno-sign-compare" \
TF_SET_ANDROID_WORKSPACE=0 \
  ./configure

bazel build --config=opt \
            --config=cuda \
            --config=noaws \
            --local_cpu_resources=HOST_CPUS*0.25 \
            --local_ram_resources=HOST_RAM*0.5 \
            //tensorflow/tools/pip_package:build_pip_package
bazel-bin/tensorflow/tools/pip_package/build_pip_package wheel/tensorflow_pkg

echo "** Install tensorflow-${version}"
sudo pip3 install wheel/tensorflow_pkg/tensorflow-${version}-*.whl

popd

TF_CPP_MIN_LOG_LEVEL=3 \
  python3 -c "import tensorflow as tf; tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR); print('tensorflow version: %s' % tf.__version__); print('tensorflow.test.is_built_with_cuda(): %s' % tf.test.is_built_with_cuda()); print('tensorflow.test.is_gpu_available(): %s' % tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None))"

echo "** Build and install tensorflow-${version} successfully"

## protobuf-3.9.2-error.log
make[3]: Entering directory '/home/nvidia/src/protobuf-3.9.2/src'
../test-driver: line 107: 12829 Segmentation fault      (core dumped) "$@" > $log_file 2>&1
FAIL: protobuf-test
PASS: protobuf-lazy-descriptor-test
PASS: protobuf-lite-test
PASS: google/protobuf/compiler/zip_output_unittest.sh
PASS: google/protobuf/io/gzip_stream_unittest.sh
PASS: protobuf-lite-arena-test
PASS: no-warning-test
============================================================================
Testsuite summary for Protocol Buffers 3.9.2
============================================================================
# TOTAL: 7
# PASS:  6
# SKIP:  0
# XFAIL: 0
# FAIL:  1
# XPASS: 0
# ERROR: 0
============================================================================
See src/test-suite.log
Please report to protobuf@googlegroups.com
============================================================================
Makefile:7751: recipe for target 'test-suite.log' failed
make[3]: *** [test-suite.log] Error 1
make[3]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
Makefile:7857: recipe for target 'check-TESTS' failed
make[2]: *** [check-TESTS] Error 2
make[2]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
Makefile:7972: recipe for target 'check-am' failed
make[1]: *** [check-am] Error 2
make[1]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
Makefile:1715: recipe for target 'check-recursive' failed
make: *** [check-recursive] Error 1

## tensorflow-2.6.0.patch
diff -Naur a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
--- a/third_party/nccl/build_defs.bzl.tpl	2021-08-10 03:10:27.000000000 +0800
+++ b/third_party/nccl/build_defs.bzl.tpl	2021-10-08 10:26:33.536077745 +0800
@@ -43,7 +43,7 @@
     # The global functions can not have a lower register count than the
     # device functions. This is enforced by setting a fixed register count.
     # https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48
-    maxrregcount = "-maxrregcount=96"
+    maxrregcount = "-maxrregcount=80"

     return cuda_default_copts() + select({
         "@local_config_cuda//:is_cuda_compiler_nvcc": [
	s#!/bin/bash

	set -e

	# tensorflow version
	version=2.6.0

	if [[ ! $(head -1 /etc/nv_tegra_release) =~ R32.*6\.1 ]] ; then
	echo "ERROR: not JetPack-4.6"
	exit 1
	fi

	case $(cat /sys/module/tegra_fuse/parameters/tegra_chip_id) in
	"33" ) # Nano and TX1
	cuda_compute=5.3
	;;
	"24" ) # TX2
	cuda_compute=6.2
	;;
	"25" ) # Xavier NX and AGX Xavier
	cuda_compute=7.2
	;;
	* ) # default
	cuda_compute=5.3,6.2,7.2
	;;
	esac

	script_path=$(realpath $0)
	patch_path=$(dirname $script_path)/tensorflow/tensorflow-${version}.patch
	trt_version=$(echo /usr/lib/aarch64-linux-gnu/libnvinfer.so.? \| cut -d '.' -f 3)

	src_folder=${HOME}/src
	mkdir -p $src_folder

	if pip3 list \| grep tensorflow > /dev/null; then
	echo "ERROR: tensorflow is installed already"
	exit 1
	fi

	if [[ -z $(pip3 list \| grep numpy) ]]; then
	echo "ERROR: missing numpy"
	exit 1
	fi

	if ! which bazel > /dev/null; then
	echo "ERROR: bazel has not been installled"
	exit 1
	fi

	echo "** Install requirements"
	sudo apt install -y llvm-10* clang-10*
	sudo apt install -y libhdf5-serial-dev hdf5-tools libhdf5-dev zlib1g-dev \
	zip libjpeg8-dev liblapack-dev libblas-dev gfortran
	sudo pip3 install -U six wheel setuptools typing_extensions
	sudo pip3 install pkgconfig
	sudo env H5PY_SETUP_REQUIRES=0 pip3 install -U h5py==3.1.0
	sudo pip3 install -U future==0.18.2 mock==3.0.5 \
	keras_preprocessing==1.1.2 keras_applications==1.0.8 \
	gast==0.4.0 futures pybind11

	echo "** Download and patch tensorflow-${version}"
	pushd $src_folder
	if [ ! -f tensorflow-${version}.tar.gz ]; then
	wget https://github.com/tensorflow/tensorflow/archive/v${version}.tar.gz -O tensorflow-${version}.tar.gz
	fi
	tar xzvf tensorflow-${version}.tar.gz
	cd tensorflow-${version}

	patch -N -p1 < $patch_path && \
	echo "tensorflow-${version} source tree appears to be patched already. Continue..."

	echo "** Configure and build tensorflow-${version}"
	export TMP=/tmp
	export CC=/usr/bin/clang-10
	export CXX=/usr/bin/clang++-10
	export CXXFLAGS="-stdlib=libc++"

	PYTHON_BIN_PATH=$(which python3) \
	PYTHON_LIB_PATH=$(python3 -c 'import site; print(site.getsitepackages()[0])') \
	TF_CUDA_COMPUTE_CAPABILITIES=${cuda_compute} \
	TF_CUDA_VERSION=10.2 \
	TF_CUDA_CLANG=1 \
	TF_DOWNLOAD_CLANG=0 \
	CLANG_CUDA_COMPILER_PATH=/usr/bin/clang-10 \
	TF_CUDNN_VERSION=8 \
	TF_TENSORRT_VERSION=${trt_version} \
	CUDA_TOOLKIT_PATH=/usr/local/cuda \
	CUDNN_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \
	TENSORRT_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \
	TF_NEED_IGNITE=0 \
	TF_ENABLE_XLA=0 \
	TF_NEED_OPENCL_SYCL=0 \
	TF_NEED_COMPUTECPP=0 \
	TF_NEED_ROCM=0 \
	TF_NEED_CUDA=1 \
	TF_NEED_TENSORRT=1 \
	TF_NEED_OPENCL=0 \
	TF_NEED_MPI=0 \
	GCC_HOST_COMPILER_PATH=$(which clang-10) \
	CC_OPT_FLAGS="-Wno-sign-compare" \
	TF_SET_ANDROID_WORKSPACE=0 \
	./configure

	bazel build --config=opt \
	--config=cuda \
	--config=noaws \
	--local_cpu_resources=HOST_CPUS*0.25 \
	--local_ram_resources=HOST_RAM*0.5 \
	//tensorflow/tools/pip_package:build_pip_package
	bazel-bin/tensorflow/tools/pip_package/build_pip_package wheel/tensorflow_pkg

	echo "** Install tensorflow-${version}"
	sudo pip3 install wheel/tensorflow_pkg/tensorflow-${version}-*.whl

	popd

	TF_CPP_MIN_LOG_LEVEL=3 \
	python3 -c "import tensorflow as tf; tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR); print('tensorflow version: %s' % tf.__version__); print('tensorflow.test.is_built_with_cuda(): %s' % tf.test.is_built_with_cuda()); print('tensorflow.test.is_gpu_available(): %s' % tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None))"

	echo "** Build and install tensorflow-${version} successfully"
	make[3]: Entering directory '/home/nvidia/src/protobuf-3.9.2/src'
	../test-driver: line 107: 12829 Segmentation fault (core dumped) "$@" > $log_file 2>&1
	FAIL: protobuf-test
	PASS: protobuf-lazy-descriptor-test
	PASS: protobuf-lite-test
	PASS: google/protobuf/compiler/zip_output_unittest.sh
	PASS: google/protobuf/io/gzip_stream_unittest.sh
	PASS: protobuf-lite-arena-test
	PASS: no-warning-test
	============================================================================
	Testsuite summary for Protocol Buffers 3.9.2
	============================================================================
	# TOTAL: 7
	# PASS: 6
	# SKIP: 0
	# XFAIL: 0
	# FAIL: 1
	# XPASS: 0
	# ERROR: 0
	============================================================================
	See src/test-suite.log
	Please report to protobuf@googlegroups.com
	============================================================================
	Makefile:7751: recipe for target 'test-suite.log' failed
	make[3]: *** [test-suite.log] Error 1
	make[3]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
	Makefile:7857: recipe for target 'check-TESTS' failed
	make[2]: *** [check-TESTS] Error 2
	make[2]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
	Makefile:7972: recipe for target 'check-am' failed
	make[1]: *** [check-am] Error 2
	make[1]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
	Makefile:1715: recipe for target 'check-recursive' failed
	make: *** [check-recursive] Error 1
	diff -Naur a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
	--- a/third_party/nccl/build_defs.bzl.tpl 2021-08-10 03:10:27.000000000 +0800
	+++ b/third_party/nccl/build_defs.bzl.tpl 2021-10-08 10:26:33.536077745 +0800
	@@ -43,7 +43,7 @@
	# The global functions can not have a lower register count than the
	# device functions. This is enforced by setting a fixed register count.
	# https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48
	- maxrregcount = "-maxrregcount=96"
	+ maxrregcount = "-maxrregcount=80"

	return cuda_default_copts() + select({
	"@local_config_cuda//:is_cuda_compiler_nvcc": [