tdeboissiere/mxnet.sh

## mxnet.sh
####################################
# Apt libraries
####################################
sudo apt-get update && sudo apt-get install -y --no-install-recommends\
        build-essential \
        vim \
        numactl \
        git \
        curl \
        git \
        wget \
        unzip \
        ca-certificates \
        cmake \
        gcc  \
        libatlas-base-dev  \
        python3 \
        python3-pip  \
        python3-dev  \
        graphviz  \
        python3-scipy  \
        python3-sklearn \
        python3-setuptools \
        libopenblas-dev \
        liblapack-dev \
        libgfortran3 \
        locate \
        htop \
        less \
        cpio \
        autoconf \
        libtool \
        automake \
        locales &&\
     sudo apt-get clean &&\
     sudo apt-get autoclean &&\
     sudo apt-get autoremove --purge

# MKL: https://software.intel.com/en-us/mkl
# Download MKL from intel and copy the folder to home, then run the following
cd /home/ubuntu/l_mkl_2018.2.199 && sudo ./install.sh --silent ./silent.cfg

# Update ldconfig (part to do by hand)
sudo vim /etc/ld.so.conf.d/intel.conf
# write: /opt/intel/mkl/lib/intel64
sudo ldconfig

# Locales (select default when prompted)
export LC_ALL="en_US.UTF-8"
export LC_CTYPE="en_US.UTF-8"
sudo dpkg-reconfigure locales

# Jemalloc and perftool
cd /home/ubuntu && git clone https://github.com/jemalloc/jemalloc.git \
    && cd jemalloc \
    && ./autogen.sh \
    && sudo make -j32 \
    && sudo make install_bin install_include install_lib

cd /home/ubuntu && git clone https://github.com/gperftools/gperftools.git \
    && cd gperftools \
    && ./autogen.sh \
    && ./configure \
    && sudo make -j32 \
    && sudo make install

# MXNET
export ADD_LDFLAGS='-L /opt/intel/mkl/lib'
export ADD_CFLAGS='-L /opt/intel/mkl/include'

cd /home/ubuntu && git clone --recursive https://github.com/apache/incubator-mxnet \
    && cd incubator-mxnet \
    && make -j$(nproc) USE_OPENCV=0 USE_MKLDNN=1 USE_CUDA=0 USE_CUDNN=0 USE_NCCL=0 USE_BLAS=mkl USE_JEMALLOC=1 USE_GPERFTOOLS=1

# Update ldconfig again (part to do by hand)
sudo vim /etc/ld.so.conf.d/intel.conf
# write: /home/ubuntu/incubator-mxnet/3rdparty/mkldnn/install/lib
# write: /usr/local/lib
sudo ldconfig

# Install mxnet python
cd /home/ubuntu/incubator-mxnet/python && python3 setup.py install \
    && pip3 install pyyaml \
    && pip3 install sockeye --no-deps \
    && pip3 install typing numpy

# Download DATA to test NTM models (cf. https://github.com/awslabs/sockeye and https://mc.ai/lstm-inference-shoot-out-intel-skylake-vs-nvidia-v100/)
cd /home/ubuntu \
    && wget http://data.statmt.org/wmt17/translation-task/preprocessed/de-en/corpus.tc.de.gz \
    && gunzip corpus.tc.de.gz \
    && head -n 1000000 corpus.tc.de > train.de \
    && wget http://data.statmt.org/wmt17/translation-task/preprocessed/de-en/corpus.tc.en.gz \
    && gunzip corpus.tc.en.gz \
    && head -n 1000000 corpus.tc.en > train.en \
    && wget http://data.statmt.org/wmt17/translation-task/preprocessed/de-en/dev.tgz \
    && tar xvzf dev.tgz

# Train a model for 2 iterations to obtain the weights and run benchmarks afterwards
python3 -m sockeye.train -s train.de \
                         -t train.en \
                         -vs newstest2016.tc.de \
                         -vt newstest2016.tc.en \
                         --num-embed 256 \
                         --rnn-num-hidden 512 \
                         --rnn-attention-type dot \
                         --max-seq-len 60 \
                         --use-cpu \
                         --max-updates 2 \
                         --checkpoint-frequency 1 \
                         -o wmt_model

# MKL parameters
export OMP_NUM_THREADS=36
export KMP_AFFINITY=granularity=fine,noduplicates,compact,1,0

# Run CPU CNN benchmark
echo 'Running CPU CNN benchmarks'
cd /home/prod/incubator-mxnet/example/image-classification && python3 benchmark_score.py

# Run LSTM benchmarks
echo 'Running NMT benchmarks'
python3 -m sockeye.translate -m wmt_model -i newstest2016.tc.de -o my_2016.tc.en --batch-size 64 --output-type benchmark --use-cpu
	####################################
	# Apt libraries
	####################################
	sudo apt-get update && sudo apt-get install -y --no-install-recommends\
	build-essential \
	vim \
	numactl \
	git \
	curl \
	git \
	wget \
	unzip \
	ca-certificates \
	cmake \
	gcc \
	libatlas-base-dev \
	python3 \
	python3-pip \
	python3-dev \
	graphviz \
	python3-scipy \
	python3-sklearn \
	python3-setuptools \
	libopenblas-dev \
	liblapack-dev \
	libgfortran3 \
	locate \
	htop \
	less \
	cpio \
	autoconf \
	libtool \
	automake \
	locales &&\
	sudo apt-get clean &&\
	sudo apt-get autoclean &&\
	sudo apt-get autoremove --purge

	# MKL: https://software.intel.com/en-us/mkl
	# Download MKL from intel and copy the folder to home, then run the following
	cd /home/ubuntu/l_mkl_2018.2.199 && sudo ./install.sh --silent ./silent.cfg

	# Update ldconfig (part to do by hand)
	sudo vim /etc/ld.so.conf.d/intel.conf
	# write: /opt/intel/mkl/lib/intel64
	sudo ldconfig

	# Locales (select default when prompted)
	export LC_ALL="en_US.UTF-8"
	export LC_CTYPE="en_US.UTF-8"
	sudo dpkg-reconfigure locales

	# Jemalloc and perftool
	cd /home/ubuntu && git clone https://github.com/jemalloc/jemalloc.git \
	&& cd jemalloc \
	&& ./autogen.sh \
	&& sudo make -j32 \
	&& sudo make install_bin install_include install_lib

	cd /home/ubuntu && git clone https://github.com/gperftools/gperftools.git \
	&& cd gperftools \
	&& ./autogen.sh \
	&& ./configure \
	&& sudo make -j32 \
	&& sudo make install

	# MXNET
	export ADD_LDFLAGS='-L /opt/intel/mkl/lib'
	export ADD_CFLAGS='-L /opt/intel/mkl/include'

	cd /home/ubuntu && git clone --recursive https://github.com/apache/incubator-mxnet \
	&& cd incubator-mxnet \
	&& make -j$(nproc) USE_OPENCV=0 USE_MKLDNN=1 USE_CUDA=0 USE_CUDNN=0 USE_NCCL=0 USE_BLAS=mkl USE_JEMALLOC=1 USE_GPERFTOOLS=1

	# Update ldconfig again (part to do by hand)
	sudo vim /etc/ld.so.conf.d/intel.conf
	# write: /home/ubuntu/incubator-mxnet/3rdparty/mkldnn/install/lib
	# write: /usr/local/lib
	sudo ldconfig

	# Install mxnet python
	cd /home/ubuntu/incubator-mxnet/python && python3 setup.py install \
	&& pip3 install pyyaml \
	&& pip3 install sockeye --no-deps \
	&& pip3 install typing numpy

	# Download DATA to test NTM models (cf. https://github.com/awslabs/sockeye and https://mc.ai/lstm-inference-shoot-out-intel-skylake-vs-nvidia-v100/)
	cd /home/ubuntu \
	&& wget http://data.statmt.org/wmt17/translation-task/preprocessed/de-en/corpus.tc.de.gz \
	&& gunzip corpus.tc.de.gz \
	&& head -n 1000000 corpus.tc.de > train.de \
	&& wget http://data.statmt.org/wmt17/translation-task/preprocessed/de-en/corpus.tc.en.gz \
	&& gunzip corpus.tc.en.gz \
	&& head -n 1000000 corpus.tc.en > train.en \
	&& wget http://data.statmt.org/wmt17/translation-task/preprocessed/de-en/dev.tgz \
	&& tar xvzf dev.tgz

	# Train a model for 2 iterations to obtain the weights and run benchmarks afterwards
	python3 -m sockeye.train -s train.de \
	-t train.en \
	-vs newstest2016.tc.de \
	-vt newstest2016.tc.en \
	--num-embed 256 \
	--rnn-num-hidden 512 \
	--rnn-attention-type dot \
	--max-seq-len 60 \
	--use-cpu \
	--max-updates 2 \
	--checkpoint-frequency 1 \
	-o wmt_model

	# MKL parameters
	export OMP_NUM_THREADS=36
	export KMP_AFFINITY=granularity=fine,noduplicates,compact,1,0

	# Run CPU CNN benchmark
	echo 'Running CPU CNN benchmarks'
	cd /home/prod/incubator-mxnet/example/image-classification && python3 benchmark_score.py

	# Run LSTM benchmarks
	echo 'Running NMT benchmarks'
	python3 -m sockeye.translate -m wmt_model -i newstest2016.tc.de -o my_2016.tc.en --batch-size 64 --output-type benchmark --use-cpu