Skip to content

Instantly share code, notes, and snippets.

@tsuchm
Last active April 7, 2021 16:22
Show Gist options
  • Save tsuchm/bf61370ccb2fac639c811d77d0c61fa7 to your computer and use it in GitHub Desktop.
Save tsuchm/bf61370ccb2fac639c811d77d0c61fa7 to your computer and use it in GitHub Desktop.
Ansible Playbook to install nvidia-docker2
Package: libnccl-dev
Pin: version *cuda9.1
Pin-Priority: 999
Package: libnccl-dev
Pin: version *cuda9.0
Pin-Priority: 999
Package: libnccl2
Pin: version *cuda9.1
Pin-Priority: 999
Package: libnccl2
Pin: version *cuda9.0
Pin-Priority: 999
Package: libcudnn7
Pin: version *cuda9.1
Pin-Priority: 999
Package: libcudnn7
Pin: version *cuda9.0
Pin-Priority: 999
Package: libcudnn7-dev
Pin: version *cuda9.1
Pin-Priority: 999
Package: libcudnn7-dev
Pin: version *cuda9.0
Pin-Priority: 999
Package: libcudnn6
Pin: version *cuda8.0
Pin-Priority: 999
Package: libcudnn6-dev
Pin: version *cuda8.0
Pin-Priority: 999
Package: libcudnn5
Pin: version *cuda8.0
Pin-Priority: 999
Package: libcudnn5-dev
Pin: version *cuda8.0
Pin-Priority: 999
- name: Enable Backports repository
apt_repository:
repo: deb http://httpredir.debian.org/debian stretch-backports main contrib non-free
state: present
update_cache: yes
filename: stretch-backports
when: ansible_distribution_release == 'stretch'
- name: Install NVidia driver
apt: name={{item}} state=latest default_release=stretch-backports update_cache=yes
with_items:
- nvidia-egl-icd
- nvidia-driver
- nvidia-smi
register: nvidia_driver_apt
- name: Reboot system to refresh NVidia driver
shell: sleep 2 && reboot
async: 1
poll: 0
when: nvidia_driver_apt.changed == True
- name: Wait system resume
local_action: wait_for host={{inventory_hostname}} port=22 delay=30
when: nvidia_driver_apt.changed == True
become: False
- name: Check NVidia driver
shell: nvidia-smi || true
register: nvidia_smi_output
failed_when: "'has failed' in nvidia_smi_output.stdout"
when: nvidia_driver_apt.changed == True
- name: Enable Jessie repositories
apt_repository: repo={{item}} state=present update_cache=no filename=jessie
with_items:
- deb http://ftp.jp.debian.org/debian/ jessie main contrib non-free
- deb http://ftp.jp.debian.org/debian/ jessie-updates main contrib non-free
- deb http://security.debian.org/debian-security jessie/updates main contrib non-free
when: ansible_distribution_release == 'stretch'
- name: Install GCC-4.9
apt: name={{item}} state=latest
with_items:
- gcc-4.9
- g++-4.9
- name: Install CUDA
apt: name=nvidia-cuda-toolkit default_release=stretch-backports state=latest
- name: Add Nvidia Machine Learning repository key
apt_key:
url: http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/7fa2af80.pub
state: present
- name: Enable Nvidia Machine Learning repository
apt_repository:
repo: deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /
state: present
update_cache: yes
filename: nvidia-machine-learning
- name: Put preferences for Nvidia Machine Learning repository.
copy: src=apt_preferences dest=/etc/apt/preferences.d/nvidia owner=root group=root mode=0644
- name: Install cuDNN and NCCL
apt: name={{item}} state=latest update_cache=yes
with_items:
- libcudnn7
- libcudnn7-dev
- libcudnn6
- libcudnn6-dev
- libcudnn5
- libcudnn5-dev
- libnccl2
- libnccl-dev
- name: Install cupti libraries, which is necessary to build TensorFlow.
apt: name={{item}} state=latest default_release=stretch-backports update_cache=yes
with_items:
- libcupti-dev
- libcupti9.1
# Because both Docker repository and NVidia-Docker repository use HTTPS,
# apt-transport-https package is necessary.
- name: Install apt-transport-https
apt: name=apt-transport-https state=present
- name: Add Docker repository key
apt_key:
url: https://download.docker.com/linux/debian/gpg
state: present
- name: Add Docker repository source
apt_repository:
repo: deb [arch=amd64] https://download.docker.com/linux/debian {{ansible_distribution_release}} stable
state: present
update_cache: no
filename: docker
- name: Add NVidia-Docker repository key
apt_key:
url: https://nvidia.github.io/nvidia-docker/gpgkey
state: present
- name: Add NVidia-Docker repository source
get_url:
url: https://nvidia.github.io/nvidia-docker/debian{{ansible_distribution_major_version}}/nvidia-docker.list
dest: /etc/apt/sources.list.d/nvidia-docker.list
owner: root
group: root
mode: 0644
- name: Install nvidia-docker2
apt: name=nvidia-docker2 state=latest update_cache=yes
register: nvidia_docker_apt
- name: Reload Docker configuration
service: name=docker state=reloaded
when: nvidia_docker_apt.changed == True
## Test nvidia-smi with the latest official CUDA image
#docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi
## When using Debian GNU/Linux 9.4 (Stretch) as your host machine,
## CUDA version mismatching causes an error. In order to avoid this
## problem, specify the tag of the CUDA image as follows:
# docker run --runtime=nvidia --rm nvidia/cuda:8.0-devel nvidia-smi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment