Skip to content

Instantly share code, notes, and snippets.

@xingjinglu
Last active July 7, 2018 17:46
Show Gist options
  • Save xingjinglu/d72474b6067fdc5c57a42f0b499a669c to your computer and use it in GitHub Desktop.
Save xingjinglu/d72474b6067fdc5c57a42f0b499a669c to your computer and use it in GitHub Desktop.
#!/bin/sh
set -xe
# It will install docker18.03, cuda9.0.
# sudo bash install_docker_nvidia_docker_cuda.sh
function outnet() {
local proxy_val=10.130.14.129:8080
local httpProxy=http://$proxy_val
http_proxy=$httpProxy https_proxy=$httpProxy ftp_proxy=$httpProxy\
RSYNC_PROXY=$proxy_val $*
}
export http_proxy=http://10.130.14.129:8080
export https_proxy=http://10.130.14.129:8080
export HTTP_PROXY=http://10.130.14.129:8080
export HTTPS_PROXY=http://10.130.14.129:8080
yum makecache | true
# pre-requisite, may fail.
#outnet sudo yum --enablerepo=* install -y container-selinux | true
#outnet sudo yum --enablerepo=* install -y libseccomp | true
# if pre fail, try again.
outnet sudo yum install -y container-selinux | true
outnet sudo yum install -y libseccomp | true
if [ -e docker-ce-18.03.1.ce-1.el7.centos.x86_64.rpm ]
then
echo "docker-ce-18.03.1.ce-1.el7.centos.x86_64.rpm exist"
else
outnet wget https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-18.03.1.ce-1.el7.centos.x86_64.rpm
fi
outnet yum install -y docker-ce-18.03.1.ce-1.el7.centos.x86_64.rpm | true
cat > /usr/lib/systemd/system/docker.service <<EOF
[Unit]
Description=Docker Application Container Engine
Documentation=http://docs.docker.io
[Service]
Environment="HTTP_PROXY=http://10.130.14.129:8080/" "NO_PROXY=localhost,127.0.0.1,10.142.104.73"
Environment="PATH=/usr/local/bin:/bin:/sbin:/usr/bin:/usr/sbin"
EnvironmentFile=-/run/flannel/docker
ExecStart=/usr/bin/dockerd --insecure-registry=10.142.104.73:8043 --log-level=error $DOCKER_NETWORK_OPTIONS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=on-failure
RestartSec=5
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
Delegate=yes
KillMode=process
[Install]
WantedBy=multi-user.target
EOF
systemctl stop docker
systemctl start docker
systemctl enable docker
systemctl daemon-reload
systemctl start docker
# install nvidia-docker
outnet curl -s -L https://nvidia.github.io/nvidia-docker/centos7/nvidia-docker.repo | sudo tee /etc/yum.repos.d/nvidia-docker.repo
#yum install -y nvidia-docker2
sudo env http_proxy=http://10.130.14.129:8080 https_proxy=http://10.130.14.129:8080 yum install -y nvidia-docker2
sudo pkill -SIGHUP dockerd
cat > /etc/docker/daemon.json <<EOF
{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"path": "/usr/bin/nvidia-container-runtime",
"runtimeArgs": []
}
}
}
EOF
systemctl stop docker
systemctl start docker
systemctl enable docker
systemctl daemon-reload
systemctl start docker
# Check CUDA version.
export PATH=/usr/local/cuda/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib64$LD_LIBRARY_PATH
# CUDA installed?
if [ $(which nvcc | wc -l) -eq 1 ]; then
echo "CUDA installed"
cuda_ver=`nvcc --version | sed -n /^Cuda/p |sed -n 's/.* \([0-9]*\).[0-9]*,.*/\1/p'`
if [ $cuda_ver -ge 8 ]; then
echo $cuda_ver
echo "cuda gpu version > 8"
fi
elif [[ -d /usr/local/cuda-9.0 || -d /usr/local/cuda-9.1 ]]; then
cuda_ver=9
elif [[ -d /opt/cuda-9.0 || -d /opt/cuda-9.1 ]]; then
cuda_ver=9
else
cuda_ver=8
fi
# Install cuda 9.0
if [ cuda_ver -lt 9 ]; then
echo "nvcc is not of version 9.0, installing nvcc of version 9.0 to build"
# Get more space for /usr.
if [[ -d /usr/local/cuda-8.0 && ! -L /usr/local/cuda-8.0 ]]
then
echo "mv cuda-8.0 --> /opt/"
mv /usr/local/cuda-8.0 /opt/ | true
ln /opt/cuda-8.0 /usr/local/ -s | true
fi
wget -c http://eva.sogou-inc.com/cache/nvidia/cuda-repo-rhel7-9-0-local-9.0.176-1.x86_64.rpm
sudo rpm -i cuda-repo-rhel7-9-0-local-9.0.176-1.x86_64.rpm | true
#sudo yum makecache | true
sudo env http_proxy=http://10.130.14.129:8080 https_proxy=http://10.130.14.129:8080 yum install -y cuda
# If /usr has less memory, install cuda as below.
#wget -c http://10.142.104.73/cache/nvidia/cuda_9.0.176_384.81_linux-run | true
#sudo bash cuda_9.0.176_384.81_linux-run --toolkit --toolkitpath=/opt/cuda-9.0/ --silent
#sudo ln /opt/cuda-9.0 /usr/local/ -s
# install cudnn.
if [ ! -e cudnn-9.0-linux-x64-v7.1.tar.gz ]; then
wget -c http://10.142.104.73/cache/nvidia/cudnn-9.0-linux-x64-v7.1.tar.gz | true
fi
if [ -d cuda ]
then
rm -rf cuda
fi
tar xzf cudnn-9.0-linux-x64-v7.1.tar.gz | true
sudo cp cuda/include/cudnn.h /usr/local/cuda-9.0/include | true
sudo cp cuda/lib64/libcudnn* /usr/local/cuda-9.0/lib64/ -rf | true
sudo mv /usr/local/cuda-9.0 /opt/ | true
sudo ln /opt/cuda-9.0 /usr/local/ -s |true
fi
# Get nvidia gpu driver version.
if [ $(modinfo nvidia | wc -l) -eq 1 ]; then
gpu_driver_ver=`modinfo nvidia | sed -n '/^version:/p' | sed 's/version: * \([0-9.]*\)$/\1/g'`
gpu_driver_ver=`echo $ver | sed 's/\([0-9]*\).[0-9]*/\1/g'`
else
gpu_driver_ver=380
fi
if [ $gpu_driver_ver -lt 384 ]; then
echo $gpu_driver_ver
echo "nvidia-gpu version not satisfy"
#Install nvidia-gpu driver.
wget -c http://10.142.104.73/cache/nvidia/NVIDIA-Linux-x86_64-390.42.run | true
env http_proxy=http://10.130.14.129:8080 https_proxy=http://10.130.14.129:8080 sudo bash NVIDIA-Linux-x86_64-390.42.run --uninstall
#Choose DKMS, not choose 32bit, new pkg-xconfig.
env http_proxy=http://10.130.14.129:8080 https_proxy=http://10.130.14.129:8080 sudo bash NVIDIA-Linux-x86_64-390.42.run
fi
docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment