Last active
November 2, 2020 02:45
-
-
Save afgane/62235290f2639e550eaf91abeabbc818 to your computer and use it in GitHub Desktop.
Add AWS GPU instance to a K8s cluster. Tested with instance type g3 on Ubuntu 20.04.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docker run --gpus all -it nvidia/cuda:10.2-devel-ubuntu18.04 /bin/bash | |
apt update && apt install -y python3 build-essential python3-pip libz-dev wget libncurses5 libncurses5-dev libbz2-dev liblzma-dev liblzma5 | |
wget https://github.com/samtools/samtools/releases/download/1.11/samtools-1.11.tar.bz2 | |
tar -xf samtools-1.11.tar.bz2 | |
cd samtools-1.11 | |
./configure --prefix /usr/local/ | |
make | |
make install | |
pip3 install ont-bonito cupy-cuda102 | |
docker commit <container id> galaxyworks/samtools-bonito:0.3.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<job_conf> | |
<plugins> | |
<plugin id="local" type="runner" load="galaxy.jobs.runners.local:LocalJobRunner" workers="4" /> | |
<plugin id="k8s" type="runner" load="galaxy.jobs.runners.kubernetes:KubernetesJobRunner"> | |
<param id="k8s_use_service_account">true</param> | |
<param id="k8s_persistent_volume_claims">galaxy-1604089776-galaxy-pvc:/galaxy/server/database,galaxy-1604089776-cvmfs-gxy-data-pvc:/cvmfs/data.galaxyproject.org,galaxy-1604089776-cvmfs-gxy-main-pvc:/cvmfs/main.galaxyproject.org,initial-project-data-pvc:/gvl/projects/current,initial-public-data-pvc:/gvl/public</param> | |
<param id="k8s_namespace">initial</param> | |
<!-- Must be DNS friendly and less than 20 characters --> | |
<param id="k8s_galaxy_instance_id">galaxy-1604089776</param> | |
<param id="k8s_run_as_user_id">101</param> | |
<param id="k8s_run_as_group_id">101</param> | |
<param id="k8s_fs_group_id">101</param> | |
<param id="k8s_supplemental_group_id">101</param> | |
<param id="k8s_pull_policy">IfNotPresent</param> | |
<param id="k8s_cleanup_job">onsuccess</param> | |
<param id="k8s_pod_priority_class">galaxy-1604089776-job-priority</param> | |
</plugin> | |
<plugin id="gpu" type="runner" load="galaxy.jobs.runners.kubernetes:KubernetesJobRunner"> | |
<param id="k8s_use_service_account">true</param> | |
<param id="k8s_persistent_volume_claims">galaxy-1604089776-galaxy-pvc:/galaxy/server/database,galaxy-1604089776-cvmfs-gxy-data-pvc:/cvmfs/data.galaxyproject.org,galaxy-1604089776-cvmfs-gxy-main-pvc:/cvmfs/main.galaxyproject.org</param> | |
<param id="k8s_namespace">initial</param> | |
<!-- Must be DNS friendly and less than 20 characters --> | |
<param id="k8s_galaxy_instance_id">galaxy-1604089776</param> | |
<param id="k8s_run_as_user_id">101</param> | |
<param id="k8s_run_as_group_id">101</param> | |
<param id="k8s_fs_group_id">101</param> | |
<param id="k8s_supplemental_group_id">101</param> | |
<param id="k8s_pull_policy">IfNotPresent</param> | |
<param id="k8s_cleanup_job">onsuccess</param> | |
<param id="k8s_pod_priority_class">galaxy-1604089776-job-priority</param> | |
<param id="k8s_affinity"> | |
nodeAffinity: | |
requiredDuringSchedulingIgnoredDuringExecution: | |
nodeSelectorTerms: | |
- matchExpressions: | |
- key: nvidia.com/gpu | |
operator: In | |
values: | |
- "true" | |
</param> | |
</plugin> | |
</plugins> | |
<handlers assign_with="db-skip-locked" /> | |
<destinations default="dynamic-k8s-dispatcher"> | |
<destination id="local" runner="local"/> | |
<destination id="dynamic-k8s-dispatcher" runner="dynamic"> | |
<param id="type">python</param> | |
<param id="function">k8s_container_mapper</param> | |
<param id="docker_default_container_id">galaxy/galaxy-k8s:20.05</param> | |
<param id="docker_enabled">true</param> | |
</destination> | |
<destination id="dynamic-gpu-dispatcher" runner="dynamic"> | |
<param id="type">python</param> | |
<param id="function">k8s_container_mapper</param> | |
<param id="k8s_runner_id">gpu</param> | |
<param id="docker_default_container_id">galaxyworks/bonito:0.3.0</param> | |
<param id="docker_enabled">true</param> | |
</destination> | |
</destinations> | |
<limits> | |
<limit type="registered_user_concurrent_jobs">5</limit> | |
<limit type="anonymous_user_concurrent_jobs">2</limit> | |
</limits> | |
<tools> | |
<tool id="bonito_basecaller" destination="dynamic-gpu-dispatcher" /> | |
</tools> | |
</job_conf> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apt update && apt install -y ubuntu-drivers-common apt-transport-https ca-certificates curl gnupg-agent software-properties-common nvidia-cuda-toolkit linux-headers-generic build-essential | |
ubuntu-drivers autoinstall | |
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - | |
add-apt-repository \ | |
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \ | |
$(lsb_release -cs) \ | |
stable" | |
apt update && apt install -y docker-ce docker-ce-cli containerd.io | |
reboot | |
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) | |
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - | |
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list | |
apt update && apt install -y nvidia-container-toolkit nvidia-container-runtime nvidia-docker2 | |
# Make sure the `default-runtime` is set | |
vi /etc/docker/daemon.json | |
{ | |
"default-runtime": "nvidia", | |
"runtimes": { | |
"nvidia": { | |
"path": "/usr/bin/nvidia-container-runtime", | |
"runtimeArgs": [] | |
} | |
} | |
} | |
sudo pkill -SIGHUP dockerd | |
sudo systemctl daemon-reload | |
systemctl restart docker | |
# Test with a demo container to see if the GPU is discovered by a container | |
docker run --gpus all nvidia/cuda nvidia-smi | |
# <add node to the cluster using the command from Rancher> | |
# Run once per cluster; it installs a daemonset | |
kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.7.0/nvidia-device-plugin.yml | |
kubectl label node <ip> nvidia.com/gpu=true | |
vi cuda.pod | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
name: cuda-vector-add | |
spec: | |
restartPolicy: OnFailure | |
containers: | |
- name: cuda-vector-add | |
# https://github.com/kubernetes/kubernetes/blob/v1.7.11/test/images/nvidia-cuda/Dockerfile | |
image: "k8s.gcr.io/cuda-vector-add:v0.1" | |
resources: | |
limits: | |
nvidia.com/gpu: 1 | |
nodeSelector: | |
nvidia.com/gpu: 'true' | |
kubectl apply -f cuda.pod | |
# There are no logs available via `kubectl logs` but `docker logs` shows process output |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment