Skip to content

Instantly share code, notes, and snippets.

@JasonAtNvidia
Created April 17, 2019 18:52
Show Gist options
  • Save JasonAtNvidia/7af68b5491534846f20ff09bfd75a33f to your computer and use it in GitHub Desktop.
Save JasonAtNvidia/7af68b5491534846f20ff09bfd75a33f to your computer and use it in GitHub Desktop.
### Do not run this script directly
# This script is intended to go along with
# BuildTensorflow.sh and remove some of the
# more complicated functions that result
# in a simple setting of environment variables
whereami=`pwd`
cuda_location=$(dirname $(dirname $(which nvcc)))
# Get at the deviceQuery sample code to extract useful information
# about the Jetson GPU and compute capability
queryout=$(/usr/local/cuda/samples/1_Utilities/deviceQuery/deviceQuery)
compute_capability=$(echo $queryout | \
grep -oP "(?<=CUDA Capability Major/Minor version number:)\s*(\d+)\.(\d*)" |
awk '$1=$1')
cuda_drive_version=$(echo $queryout | \
grep -oP "(?<=CUDA Driver Version =)\s*(\d+)\.(\d*)" | \
awk '$1=$1')
cudnn_file=$(locate libcudnn.so | tail -n 1)
cd $whereami
PYTHON_BIN_PATH=$(which $PYTHON)
echo "PYTHON_BIN_PATH=$PYTHON_BIN_PATH"
USE_DEFAULT_PYTHON_LIB_PATH=1
GCC_HOST_COMPILER_PATH=$(which gcc)
echo "GCC_HOST_COMPILER_PATH=$GCC_HOST_COMPILER_PATH"
# Compile CUDA Information
TF_NEED_CUDA=1
CUDA_TOOLKIT_PATH=$cuda_location
echo "CUDA_TOOLKIT_PATH=$CUDA_TOOLKIT_PATH"
TF_CUDA_VERSION=$cuda_drive_version
echo "TF_CUDA_VERSION=$TF_CUDA_VERSION"
TF_CUDA_COMPUTE_CAPABILITIES=5.3,$compute_capability
echo "TF_CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES"
CUDNN_INSTALL_PATH=$(dirname "$cudnn_file")
echo "CUDNN_INSTALL_PATH=$CUDNN_INSTALL_PATH"
TF_CUDNN_VERSION=$(ls -l $CUDNN_INSTALL_PATH | grep -oP '(?<=libcudnn.so.)\s*(\d+)\.(\d*)\.(\d*)\s*' | head -n 1)
echo "TF_CUDNN_VERSION=$TF_CUDNN_VERSION"
# Starting with TF 1.7 TensorRT was built into the TF source code
# Hopefully this was included in your Jetson install
TF_NEED_TENSORRT=1
TENSORRT_INSTALL_PATH=$CUDNN_INSTALL_PATH
# NCCL is an NVIDIA library, but we won't install it more than TF wants at this point
# TF sees 1.3 as default at this time
TF_NCCL_VERSION="1.3"
# Use nvcc as the CUDA compiler
TF_CUDA_CLANG=0
# Google Cloud Platform Support
TF_NEED_GCP=0
# Hadoop File System Support
TF_NEED_HDFS=0
# Kafka file System
TF_NEED_KAFKA=0
# Amazon S3 support
TF_NEED_S3=0
TF_NEED_AWS=0
# Architecture Flags
CC_OPT_FLAGS=-march=native
# JE Malloc
TF_NEED_JEMALLOC=1
# GDR
TF_NEED_GDR=0
# OpenCL Support (This is an NVIDIA board, don't use OpenCL)
TF_NEED_OPENCL=0
# XLA
TF_ENABLE_XLA=0
# NO MKL available for ARM
TF_NEED_MKL=0
# MPI Support (we use the video card, no need for this)
TF_NEED_MPI=0
# VERBS
TF_NEED_VERBS=0
#######################################
# Export Everything
# just happens to be in the order
# that the configure file asks the
# questions
#######################################
export PYTHON_BIN_PATH=$PYTHON_BIN_PATH
export USE_DEFAULT_PYTHON_LIB_PATH=$USE_DEFAULT_PYTHON_LIB_PATH
export TF_NEED_JEMALLOC=$TF_NEED_JEMALLOC
export TF_NEED_GCP=$TF_NEED_GCP
export TF_NEED_HDFS=$TF_NEED_HDFS
export TF_NEED_S3=$TF_NEED_S3
export TF_NEED_AWS=$TF_NEED_AWS
export TF_NEED_KAFKA=$TF_NEED_KAFKA
export TF_ENABLE_XLA=$TF_ENABLE_XLA
export TF_NEED_GDR=$TF_NEED_GDR
export TF_NEED_VERBS=$TF_NEED_VERBS
export TF_NEED_OPENCL=$TF_NEED_OPENCL
export TF_NEED_OPENCL_SYCL=$TF_NEED_OPENCL
export TF_NEED_CUDA=$TF_NEED_CUDA
export TF_CUDA_VERSION=$TF_CUDA_VERSION
export CUDA_TOOLKIT_PATH=$CUDA_TOOLKIT_PATH
export TF_CUDNN_VERSION=$TF_CUDNN_VERSION
export CUDNN_INSTALL_PATH=$CUDNN_INSTALL_PATH
export TF_NEED_TENSORRT=$TF_NEED_TENSORRT
export TENSORRT_INSTALL_PATH=$TENSORRT_INSTALL_PATH
export TF_NCCL_VERSION=$TF_NCCL_VERSION
export TF_CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES
export TF_CUDA_CLANG=$TF_CUDA_CLANG
export GCC_HOST_COMPILER_PATH=$GCC_HOST_COMPILER_PATH
export TF_NEED_MPI=$TF_NEED_MPI
export CC_OPT_FLAGS=$CC_OPT_FLAGS
export TF_SET_ANDROID_WORKSPACE=0
#!/bin/bash
# Perform apt installs for bazel dependencies
sudo apt-get install build-essential openjdk-8-jdk python-dev zip unzip -y
# Tensorflow dependencies
sudo apt-get install python-numpy python-scipy python-pip python-wheel -y
sudo apt-get install python-enum32 python-mock python-h5py -y
sudo apt-get install python3-dev python3-numpy python3-scipy python3-pip python3-wheel -y
sudo apt-get install python3-mock python3-h5py -y
# mlocate will be used to locate libraries needed by TF
sudo apt-get install mlocate -y
sudo updatedb
# TF > 1.10 version requires keras libraries be present for the build
pip install keras_applications==1.0.4 --no-deps
pip install keras_preprocessing==1.0.2 --no-deps
pip3 install keras_applications==1.0.4 --no-deps
pip3 install keras_preprocessing==1.0.2 --no-deps
pip3 install enum34
# Build the deviceQuery script to allow us to pull out device information
cuda_location=/usr/local/cuda
cd $cuda_location/samples/1_Utilities/deviceQuery
sudo make -s
# Go out and get bazel
wget --no-check-certificate https://github.com/bazelbuild/bazel/releases/download/0.15.0/bazel-0.15.0-dist.zip
# unzip the bazel
unzip bazel-0.15.0-dist.zip -d bazel_build
cd bazel_build
./compile.sh
# Move bazel out to the main directory
mv output/bazel ../bazel
# clear up needed space
cd ..
rm -rf bazel_build
rm -rf bazel-0.15.2-dist.zip
# Bazel is done, next we need tensorflow
git clone --single-branch -b v1.12.0 https://github.com/tensorflow/tensorflow
# Prepare the environment, helperscript automagically looks through the device
# and finds the libraries and CUDA information to pass to TF
source ./helperscript
# make sure tensorflow knows bazel exists
export PATH="$PATH:$(`pwd`)"
# configure the build of tensorflow
bash ./tensorflow/configure
# Move into Tensorflow and build it.... YAY
cd tensorflow
git apply ../tf_1.12.0_patch.patch
../bazel build --config=opt --config=cuda --config=monolithic //tensorflow/tools/pip_package:build_pip_package
../bazel build --config=opt --config=cuda --config=monolithic //tensorflow:libtensorflow_cc.so
diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
index afb5ec0..4a45afe 100644
--- a/tensorflow/contrib/lite/kernels/internal/BUILD
+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
@@ -21,7 +21,6 @@ HARD_FP_FLAGS_IF_APPLICABLE = select({
NEON_FLAGS_IF_APPLICABLE = select({
":arm": [
"-O3",
- "-mfpu=neon",
],
":armeabi-v7a": [
"-O3",
diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD
index 5426f79..82d8a0d 100644
--- a/third_party/aws.BUILD
+++ b/third_party/aws.BUILD
@@ -24,7 +24,9 @@ cc_library(
"@org_tensorflow//tensorflow:raspberry_pi_armeabi": glob([
"aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
]),
- "//conditions:default": [],
+ "//conditions:default": glob([
+ "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
+ ]),
}) + glob([
"aws-cpp-sdk-core/include/**/*.h",
"aws-cpp-sdk-core/source/*.cpp",
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment