KellenSunderland/Dockerfile.build.master.jetson

## arm.crosscompile.mk
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

#-------------------------------------------------------------------------------
#  Template configuration for compiling mxnet
#
#  If you want to change the configuration, please use the following
#  steps. Assume you are on the root directory of mxnet. First copy the this
#  file so that any local changes will be ignored by git
#
#  $ cp make/config.mk .
#
#  Next modify the according entries, and then compile by
#
#  $ make
#
#  or build in parallel with 8 threads
#
#  $ make -j8
#-------------------------------------------------------------------------------

#---------------------
# We do not assign compilers here.  Often when cross-compiling these will already
# be set correctly.
#--------------------

export NVCC = nvcc

# whether compile with options for MXNet developer
DEV = 0

# whether compile with debug
DEBUG = 0

# whether compiler with profiler
USE_PROFILER =

# the additional link flags you want to add
# TODO: Move flags here
ADD_LDFLAGS=-static-libstdc++

# the additional compile flags you want to add
ADD_CFLAGS =

#---------------------------------------------
# matrix computation libraries for CPU/GPU
#---------------------------------------------

# whether use CUDA during compile
USE_CUDA = 0

# add the path to CUDA library to link and compile flag
# if you have already add them to environment variable, leave it as NONE
# USE_CUDA_PATH = /usr/local/cuda
USE_CUDA_PATH = NONE

# whether use CuDNN R3 library
USE_CUDNN = 0

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVRTC = 0

# whether use opencv during compilation
# you can disable it, however, you will not able to use
# imbin iterator
USE_OPENCV = 0

# use openmp for parallelization
USE_OPENMP = 1

# MKL ML Library for Intel CPU/Xeon Phi
# Please refer to MKL_README.md for details

# MKL ML Library folder, need to be root for /usr/local
# Change to User Home directory for standard user
# For USE_BLAS!=mkl only
MKLML_ROOT=/usr/local

# whether use MKL2017 library
USE_MKL2017 = 0

# whether use MKL2017 experimental feature for high performance
# Prerequisite USE_MKL2017=1
USE_MKL2017_EXPERIMENTAL = 0

# whether use NNPACK library
USE_NNPACK = 0

# For arm builds we're using openblas
USE_BLAS = openblas

# whether use lapack during compilation
# only effective when compiled with blas versions openblas/apple/atlas/mkl
USE_LAPACK = 1

# path to lapack library in case of a non-standard installation
USE_LAPACK_PATH =

# add path to intel library, you may need it for MKL, if you did not add the path
# to environment variable
USE_INTEL_PATH = NONE

# If use MKL only for BLAS, choose static link automatically to allow python wrapper
ifeq ($(USE_MKL2017), 0)
ifeq ($(USE_BLAS), mkl)
USE_STATIC_MKL = 1
endif
else
USE_STATIC_MKL = NONE
endif

#----------------------------
# distributed computing
#----------------------------

# whether or not to enable multi-machine supporting
USE_DIST_KVSTORE = 0

# whether or not allow to read and write HDFS directly. If yes, then hadoop is
# required
USE_HDFS = 0

# path to libjvm.so. required if USE_HDFS=1
LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server

# whether or not allow to read and write AWS S3 directly. If yes, then
# libcurl4-openssl-dev is required, it can be installed on Ubuntu by
# sudo apt-get install -y libcurl4-openssl-dev
USE_S3 = 0

#----------------------------
# additional operators
#----------------------------

# path to folders containing projects specific operators that you don't want to put in src/operators
EXTRA_OPERATORS =

#----------------------------
# other features
#----------------------------

# Create C++ interface package
USE_CPP_PACKAGE = 0

#----------------------------
# plugins
#----------------------------

# whether to use caffe integration. This requires installing caffe.
# You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH
# CAFFE_PATH = $(HOME)/caffe
# MXNET_PLUGINS += plugin/caffe/caffe.mk

# whether to use torch integration. This requires installing torch.
# You also need to add TORCH_PATH/install/lib to your LD_LIBRARY_PATH
# TORCH_PATH = $(HOME)/torch
# MXNET_PLUGINS += plugin/torch/torch.mk

# WARPCTC_PATH = $(HOME)/warp-ctc
# MXNET_PLUGINS += plugin/warpctc/warpctc.mk

# whether to use sframe integration. This requires build sframe
# git@github.com:dato-code/SFrame.git
# SFRAME_PATH = $(HOME)/SFrame
# MXNET_PLUGINS += plugin/sframe/plugin.mk

## Dockerfile.build.master.jetson
# -*- mode: dockerfile -*-
# Work in progress, some of the manual steps below will be fixed in a subsequent release.
# Dockerfile to build libmxnet.so, and a python wheel for the Jetson TX1 and TX2
# Builds from Github MXNet master branch
# Once complete copy artifacts from /work/build to target device.
# Install by running 'pip wheel name_of_wheel.whl' and copying the .so to a folder on your LD_LIBRARY_PATH

FROM nvidia/cuda:8.0-cudnn5-devel as cudabuilder

FROM dockcross/linux-arm64

ENV ARCH aarch64
ENV NVCCFLAGS "-m64"
ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62"
ENV BUILD_OPTS "USE_OPENCV=0 USE_BLAS=openblas USE_SSE=0 USE_CUDA=1 USE_CUDNN=1 ENABLE_CUDA_RTC=0 USE_NCCL=0 USE_CUDA_PATH=/usr/local/cuda/"
ENV CC /usr/bin/aarch64-linux-gnu-gcc
ENV CXX /usr/bin/aarch64-linux-gnu-g++
ENV FC /usr/bin/aarch64-linux-gnu-gfortran-4.9
ENV HOSTCC gcc

WORKDIR /work

# Build OpenBLAS
ADD https://api.github.com/repos/xianyi/OpenBLAS/git/refs/heads/master /tmp/openblas_version.json
RUN git clone https://github.com/xianyi/OpenBLAS.git && \
    cd OpenBLAS && \
    make -j$(nproc) TARGET=ARMV8 && \
    make install && \
    ln -s /opt/OpenBLAS/lib/libopenblas.so /usr/lib/libopenblas.so && \
    ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/libopenblas.a && \
    ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/liblapack.a

ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/opt/OpenBLAS/lib
ENV CPLUS_INCLUDE_PATH /opt/OpenBLAS/include

# Setup CUDA build env (including configuring and copying nvcc)
COPY --from=cudabuilder /usr/local/cuda /usr/local/cuda
COPY --from=cudabuilder /usr/include/cudnn.h /usr/include/cudnn.h
ENV PATH $PATH:/usr/local/cuda/bin
ENV TARGET_ARCH aarch64
ENV TARGET_OS linux

# Install ARM depedencies based on Jetpack 3.1
RUN wget http://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/013/linux-x64/cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && \
    wget http://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/013/linux-x64/libcudnn6_6.0.21-1+cuda8.0_arm64.deb && \
    dpkg -i cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && \
    dpkg -i libcudnn6_6.0.21-1+cuda8.0_arm64.deb && \
    apt update -y && \
    apt install cuda-cudart-cross-aarch64-8-0 cuda-cublas-cross-aarch64-8-0 \
    cuda-nvml-cross-aarch64-8-0 cuda-nvrtc-cross-aarch64-8-0 cuda-cufft-cross-aarch64-8-0 \
    cuda-curand-cross-aarch64-8-0 cuda-cusolver-cross-aarch64-8-0 cuda-cusparse-cross-aarch64-8-0 \
    cuda-misc-headers-cross-aarch64-8-0 cuda-npp-cross-aarch64-8-0 libcudnn6 -y && \
    cp /usr/local/cuda-8.0/targets/aarch64-linux/lib/*.so /usr/local/cuda/lib64/ && \
    cp /usr/local/cuda-8.0/targets/aarch64-linux/lib/stubs/*.so /usr/local/cuda/lib64/stubs/ && \
    cp /usr/lib/aarch64-linux-gnu/libcudnn.* /usr/local/cuda/lib64/ && \
    ln -s /usr/local/cuda/lib64/libcudnn.so.6 /usr/local/cuda/lib64/libcudnn.so && \
    cp -r /usr/local/cuda-8.0/targets/aarch64-linux/include/ /usr/local/cuda/include/ && \
    rm cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && rm libcudnn6_6.0.21-1+cuda8.0_arm64.deb

# Build MXNet
RUN git clone --recurse https://github.com/apache/incubator-mxnet.git mxnet

WORKDIR /work/mxnet

# Add ARM specific settings
ADD arm.crosscompile.mk make/config.mk

# Build and link
RUN make -j$(nproc) $BUILD_OPTS

# Create a binary wheel for easy installation.
# When using tool.py output will be in the jetson folder.
# Scp the .whl file to your target device, and install via
# pip install
WORKDIR /work/mxnet/python
RUN python setup.py  bdist_wheel --universal

# Copy build artifacts to output folder for tool.py script
RUN mkdir -p /work/build & cp dist/*.whl /work/build && cp ../lib/* /work/build

## Instructions
Note: requires a newish (~last 6 months) version of docker.

HOST:
wget https://gist.githubusercontent.com/KellenSunderland/659f31c283a1ad2c04e9852eabed111c/raw/18f2fd0dc6f5539d94699de41a31b666dc432f9a/arm.crosscompile.mk
wget https://gist.githubusercontent.com/KellenSunderland/659f31c283a1ad2c04e9852eabed111c/raw/18f2fd0dc6f5539d94699de41a31b666dc432f9a/Dockerfile.build.master.jetson
docker build -f Dockerfile.build.master.jetson -t mxnet_jetson .
docker run --rm -v $(pwd)/build:/tmp mxnet_jetson:latest sh -c "cp /work/build/* /tmp"

Copy build artifacts to device.
DEVICE:

pip wheel mxnet-1.0.1-py2.py3-none-any.whl
cp libmxnet.so /usr/lib64  (or some folder on the LD_LIBRARY_PATH or site package folder for mxnet.  This step will be fixed soon.)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	#-------------------------------------------------------------------------------
	# Template configuration for compiling mxnet
	#
	# If you want to change the configuration, please use the following
	# steps. Assume you are on the root directory of mxnet. First copy the this
	# file so that any local changes will be ignored by git
	#
	# $ cp make/config.mk .
	#
	# Next modify the according entries, and then compile by
	#
	# $ make
	#
	# or build in parallel with 8 threads
	#
	# $ make -j8
	#-------------------------------------------------------------------------------

	#---------------------
	# We do not assign compilers here. Often when cross-compiling these will already
	# be set correctly.
	#--------------------

	export NVCC = nvcc

	# whether compile with options for MXNet developer
	DEV = 0

	# whether compile with debug
	DEBUG = 0

	# whether compiler with profiler
	USE_PROFILER =

	# the additional link flags you want to add
	# TODO: Move flags here
	ADD_LDFLAGS=-static-libstdc++

	# the additional compile flags you want to add
	ADD_CFLAGS =

	#---------------------------------------------
	# matrix computation libraries for CPU/GPU
	#---------------------------------------------

	# whether use CUDA during compile
	USE_CUDA = 0

	# add the path to CUDA library to link and compile flag
	# if you have already add them to environment variable, leave it as NONE
	# USE_CUDA_PATH = /usr/local/cuda
	USE_CUDA_PATH = NONE

	# whether use CuDNN R3 library
	USE_CUDNN = 0

	# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
	USE_NVRTC = 0

	# whether use opencv during compilation
	# you can disable it, however, you will not able to use
	# imbin iterator
	USE_OPENCV = 0

	# use openmp for parallelization
	USE_OPENMP = 1

	# MKL ML Library for Intel CPU/Xeon Phi
	# Please refer to MKL_README.md for details

	# MKL ML Library folder, need to be root for /usr/local
	# Change to User Home directory for standard user
	# For USE_BLAS!=mkl only
	MKLML_ROOT=/usr/local

	# whether use MKL2017 library
	USE_MKL2017 = 0

	# whether use MKL2017 experimental feature for high performance
	# Prerequisite USE_MKL2017=1
	USE_MKL2017_EXPERIMENTAL = 0

	# whether use NNPACK library
	USE_NNPACK = 0

	# For arm builds we're using openblas
	USE_BLAS = openblas

	# whether use lapack during compilation
	# only effective when compiled with blas versions openblas/apple/atlas/mkl
	USE_LAPACK = 1

	# path to lapack library in case of a non-standard installation
	USE_LAPACK_PATH =

	# add path to intel library, you may need it for MKL, if you did not add the path
	# to environment variable
	USE_INTEL_PATH = NONE

	# If use MKL only for BLAS, choose static link automatically to allow python wrapper
	ifeq ($(USE_MKL2017), 0)
	ifeq ($(USE_BLAS), mkl)
	USE_STATIC_MKL = 1
	endif
	else
	USE_STATIC_MKL = NONE
	endif

	#----------------------------
	# distributed computing
	#----------------------------

	# whether or not to enable multi-machine supporting
	USE_DIST_KVSTORE = 0

	# whether or not allow to read and write HDFS directly. If yes, then hadoop is
	# required
	USE_HDFS = 0

	# path to libjvm.so. required if USE_HDFS=1
	LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server

	# whether or not allow to read and write AWS S3 directly. If yes, then
	# libcurl4-openssl-dev is required, it can be installed on Ubuntu by
	# sudo apt-get install -y libcurl4-openssl-dev
	USE_S3 = 0

	#----------------------------
	# additional operators
	#----------------------------

	# path to folders containing projects specific operators that you don't want to put in src/operators
	EXTRA_OPERATORS =

	#----------------------------
	# other features
	#----------------------------

	# Create C++ interface package
	USE_CPP_PACKAGE = 0

	#----------------------------
	# plugins
	#----------------------------

	# whether to use caffe integration. This requires installing caffe.
	# You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH
	# CAFFE_PATH = $(HOME)/caffe
	# MXNET_PLUGINS += plugin/caffe/caffe.mk

	# whether to use torch integration. This requires installing torch.
	# You also need to add TORCH_PATH/install/lib to your LD_LIBRARY_PATH
	# TORCH_PATH = $(HOME)/torch
	# MXNET_PLUGINS += plugin/torch/torch.mk

	# WARPCTC_PATH = $(HOME)/warp-ctc
	# MXNET_PLUGINS += plugin/warpctc/warpctc.mk

	# whether to use sframe integration. This requires build sframe
	# git@github.com:dato-code/SFrame.git
	# SFRAME_PATH = $(HOME)/SFrame
	# MXNET_PLUGINS += plugin/sframe/plugin.mk
	# -- mode: dockerfile --
	# Work in progress, some of the manual steps below will be fixed in a subsequent release.
	# Dockerfile to build libmxnet.so, and a python wheel for the Jetson TX1 and TX2
	# Builds from Github MXNet master branch
	# Once complete copy artifacts from /work/build to target device.
	# Install by running 'pip wheel name_of_wheel.whl' and copying the .so to a folder on your LD_LIBRARY_PATH

	FROM nvidia/cuda:8.0-cudnn5-devel as cudabuilder

	FROM dockcross/linux-arm64

	ENV ARCH aarch64
	ENV NVCCFLAGS "-m64"
	ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62"
	ENV BUILD_OPTS "USE_OPENCV=0 USE_BLAS=openblas USE_SSE=0 USE_CUDA=1 USE_CUDNN=1 ENABLE_CUDA_RTC=0 USE_NCCL=0 USE_CUDA_PATH=/usr/local/cuda/"
	ENV CC /usr/bin/aarch64-linux-gnu-gcc
	ENV CXX /usr/bin/aarch64-linux-gnu-g++
	ENV FC /usr/bin/aarch64-linux-gnu-gfortran-4.9
	ENV HOSTCC gcc

	WORKDIR /work

	# Build OpenBLAS
	ADD https://api.github.com/repos/xianyi/OpenBLAS/git/refs/heads/master /tmp/openblas_version.json
	RUN git clone https://github.com/xianyi/OpenBLAS.git && \
	cd OpenBLAS && \
	make -j$(nproc) TARGET=ARMV8 && \
	make install && \
	ln -s /opt/OpenBLAS/lib/libopenblas.so /usr/lib/libopenblas.so && \
	ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/libopenblas.a && \
	ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/liblapack.a

	ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/opt/OpenBLAS/lib
	ENV CPLUS_INCLUDE_PATH /opt/OpenBLAS/include

	# Setup CUDA build env (including configuring and copying nvcc)
	COPY --from=cudabuilder /usr/local/cuda /usr/local/cuda
	COPY --from=cudabuilder /usr/include/cudnn.h /usr/include/cudnn.h
	ENV PATH $PATH:/usr/local/cuda/bin
	ENV TARGET_ARCH aarch64
	ENV TARGET_OS linux

	# Install ARM depedencies based on Jetpack 3.1
	RUN wget http://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/013/linux-x64/cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && \
	wget http://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/013/linux-x64/libcudnn6_6.0.21-1+cuda8.0_arm64.deb && \
	dpkg -i cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && \
	dpkg -i libcudnn6_6.0.21-1+cuda8.0_arm64.deb && \
	apt update -y && \
	apt install cuda-cudart-cross-aarch64-8-0 cuda-cublas-cross-aarch64-8-0 \
	cuda-nvml-cross-aarch64-8-0 cuda-nvrtc-cross-aarch64-8-0 cuda-cufft-cross-aarch64-8-0 \
	cuda-curand-cross-aarch64-8-0 cuda-cusolver-cross-aarch64-8-0 cuda-cusparse-cross-aarch64-8-0 \
	cuda-misc-headers-cross-aarch64-8-0 cuda-npp-cross-aarch64-8-0 libcudnn6 -y && \
	cp /usr/local/cuda-8.0/targets/aarch64-linux/lib/*.so /usr/local/cuda/lib64/ && \
	cp /usr/local/cuda-8.0/targets/aarch64-linux/lib/stubs/*.so /usr/local/cuda/lib64/stubs/ && \
	cp /usr/lib/aarch64-linux-gnu/libcudnn.* /usr/local/cuda/lib64/ && \
	ln -s /usr/local/cuda/lib64/libcudnn.so.6 /usr/local/cuda/lib64/libcudnn.so && \
	cp -r /usr/local/cuda-8.0/targets/aarch64-linux/include/ /usr/local/cuda/include/ && \
	rm cuda-repo-l4t-8-0-local_8.0.84-1_arm64.deb && rm libcudnn6_6.0.21-1+cuda8.0_arm64.deb

	# Build MXNet
	RUN git clone --recurse https://github.com/apache/incubator-mxnet.git mxnet

	WORKDIR /work/mxnet

	# Add ARM specific settings
	ADD arm.crosscompile.mk make/config.mk

	# Build and link
	RUN make -j$(nproc) $BUILD_OPTS

	# Create a binary wheel for easy installation.
	# When using tool.py output will be in the jetson folder.
	# Scp the .whl file to your target device, and install via
	# pip install
	WORKDIR /work/mxnet/python
	RUN python setup.py bdist_wheel --universal

	# Copy build artifacts to output folder for tool.py script
	RUN mkdir -p /work/build & cp dist/.whl /work/build && cp ../lib/ /work/build
	Note: requires a newish (~last 6 months) version of docker.

	HOST:
	wget https://gist.githubusercontent.com/KellenSunderland/659f31c283a1ad2c04e9852eabed111c/raw/18f2fd0dc6f5539d94699de41a31b666dc432f9a/arm.crosscompile.mk
	wget https://gist.githubusercontent.com/KellenSunderland/659f31c283a1ad2c04e9852eabed111c/raw/18f2fd0dc6f5539d94699de41a31b666dc432f9a/Dockerfile.build.master.jetson
	docker build -f Dockerfile.build.master.jetson -t mxnet_jetson .
	docker run --rm -v $(pwd)/build:/tmp mxnet_jetson:latest sh -c "cp /work/build/* /tmp"

	Copy build artifacts to device.
	DEVICE:

	pip wheel mxnet-1.0.1-py2.py3-none-any.whl
	cp libmxnet.so /usr/lib64 (or some folder on the LD_LIBRARY_PATH or site package folder for mxnet. This step will be fixed soon.)