Skip to content

Instantly share code, notes, and snippets.

@solesensei
Last active May 17, 2024 15:58
Show Gist options
  • Save solesensei/05de2460d0d85e8ddcc173d2d1d49d31 to your computer and use it in GitHub Desktop.
Save solesensei/05de2460d0d85e8ddcc173d2d1d49d31 to your computer and use it in GitHub Desktop.
Init VM (docker, cuda, tools)
#!/bin/bash
set -e
PUBLIC_KEY=~/.ssh/id_rsa.pub
SIZE=Standard_NC24ads_A100_v4
read -p 'VM Name: ' VM_NAME
if [ -z "$VM_NAME" ]; then
echo "VM name is required"
exit 1
fi
# Create cloud-init.sh
cat <<EOF > cloud-init.sh
#!/bin/sh
curl -s https://gist.githubusercontent.com/solesensei/05de2460d0d85e8ddcc173d2d1d49d31/raw/vm-init.sh | bash > /tmp/vm-init.log 2>&1
EOF
echo "Select subscription..."
az account set --subscription ah-dev
SUBSCRIPTION_ID=$(az account show --query id --output tsv)
echo "Creating VM $VM_NAME..."
az vm create \
--resource-group devpod-rg \
--name $VM_NAME \
--image Ubuntu2204 \
--admin-username devpod \
--ssh-key-values $PUBLIC_KEY \
--custom-data cloud-init.sh \
--os-disk-delete-option delete \
--nic-delete-option delete \
--storage-sku Premium_LRS \
--os-disk-size-gb 50 \
--subnet /subscriptions/$SUBSCRIPTION_ID/resourceGroups/ah-dev-vnet-rg/providers/Microsoft.Network/virtualNetworks/ah-dev-vnet/subnets/main \
--size $SIZE \
--public-ip-address '' \
--tags "user=$USER"
rm cloud-init.sh
# Get the public IP address of the VM and connect to it
echo "Connecting to VM $VM_NAME..."
IP=$(az vm show \
--resource-group devpod-rg \
--name $VM_NAME \
--show-details \
--query privateIps \
--output tsv)
echo "ssh devpod@$IP"
ssh devpod@$IP
echo "Restart VM with 'az vm restart --resource-group devpod-rg --name $VM_NAME'"
#!/bin/bash
set -e
export VM_INIT_VERSION="0.0.6"
export HOME=/home/devpod
export USER=devpod
echo -e "######################################################################"
echo -e "# VM INIT SCRIPT v$VM_INIT_VERSION #"
echo -e "######################################################################\n\n"
sleep 1
echo -e "######################################################################"
echo -e "# PACKAGES INSTALLATION #"
echo -e "######################################################################\n\n"
echo "1. Install packages"
sudo apt-get update
sudo apt-get install -y curl \
wget \
git \
jq \
golang-go
echo "Install devpod"
curl -L -o devpod "https://github.com/loft-sh/devpod/releases/latest/download/devpod-linux-amd64" && sudo install -c -m 0755 devpod /usr/local/bin && rm -f devpod
echo -e "######################################################################"
echo -e "# PYTHON INSTALLATION #"
echo -e "######################################################################\n\n"
echo "Install python"
sudo apt install -y python3.10
sudo mkdir -p /usr/local/bin && sudo ln -s $(which python3) /usr/local/bin/python
curl -sS https://bootstrap.pypa.io/get-pip.py | python
pip install ipython -U
echo -e "######################################################################"
echo -e "# DRIVER INSTALLATION #"
echo -e "######################################################################\n\n"
# see for details:
# https://learn.microsoft.com/en-us/azure/virtual-machines/linux/n-series-driver-setup#install-cuda-driver-on-ubuntu-with-secure-boot-enabled
echo "0. Remove installed cuda drivers"
sudo rm -rf /etc/apt/sources.list.d/cuda*
sudo apt-get remove -y --autoremove nvidia-cuda-toolkit
sudo apt remove -y --autoremove nvidia-*
echo "1. Install pre-built Azure Linux kernel based NVIDIA modules and CUDA drivers"
sudo apt-get update
sudo apt-get install -y linux-modules-nvidia-525-azure nvidia-driver-525
echo "\n\n------------------------------------------------------"
echo "2. Change preference of NVIDIA packages to prefer NVIDIA repository\n\n"
sudo tee /etc/apt/preferences.d/cuda-repository-pin-600 > /dev/null <<EOL
Package: nsight-compute
Pin: origin *ubuntu.com*
Pin-Priority: -1
Package: nsight-systems
Pin: origin *ubuntu.com*
Pin-Priority: -1
Package: nvidia-modprobe
Pin: release l=NVIDIA CUDA
Pin-Priority: 600
Package: nvidia-settings
Pin: release l=NVIDIA CUDA
Pin-Priority: 600
Package: *
Pin: release l=NVIDIA CUDA
Pin-Priority: 100
EOL
echo "\n\n------------------------------------------------------"
echo "3. Add CUDA repository\n\n"
distro=ubuntu2204
arch=x86_64
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/$distro/$arch/3bf863cc.pub
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/$distro/$arch/ /"
echo "\n\n------------------------------------------------------"
echo "4. Install kernel headers and development packages, and remove outdated signing key\n\n"
sudo apt-get install linux-headers-$(uname -r)
sudo apt-key del 7fa2af80
echo "\n\n------------------------------------------------------"
echo "5. Install the new cuda-keyring package\n\n"
wget https://developer.download.nvidia.com/compute/cuda/repos/$distro/$arch/cuda-keyring_1.1-1_all.deb
yes | sudo dpkg -i cuda-keyring_1.1-1_all.deb
echo "------------------------------------------------------\n\n"
echo "6. Update APT repository cache and install NVIDIA GPUDirect Storage\n\n"
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "ACTION REQUIRED: When asked for password for Secure Boot, enter a password you will remember. "
echo "Simple password like qwerty123 will work just fine.\n\n"
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n"
echo -e "######################################################################"
echo -e "# DOCKER INSTALLATION #"
echo -e "######################################################################\n\n"
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
sudo usermod -aG docker devpod
newgrp docker
sudo apt-get install -y nvidia-container-toolkit
echo -e "######################################################################"
echo -e "# TOOLS INSTALLATION #"
echo -e "######################################################################\n\n"
echo "Install Azure CLI"
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
echo "Install Kubernetes CLI and Kubelogin"
sudo az aks install-cli
echo "Install FZF"
git clone --depth 1 https://github.com/junegunn/fzf.git /home/devpod/.fzf && /home/devpod/.fzf/install --all
echo "Setup DNS"
sudo apt install resolvconf
sudo systemctl enable --now resolvconf.service
echo 'nameserver 20.73.136.231' | sudo tee -a /etc/resolvconf/resolv.conf.d/head
sudo resolvconf -u
echo -e "######################################################################"
echo -e "# ALIASES #"
echo -e "######################################################################\n\n"
echo "# ----------- Aliases -----------" >> /home/devpod/.bashrc
echo "# Kubernetes" >> /home/devpod/.bashrc
echo "alias k=kubectl" >> /home/devpod/.bashrc
echo "alias kdevpod=\"kpods -n devpod --show-labels\"" >> /home/devpod/.bashrc
echo "# Docker" >> /home/devpod/.bashrc
echo "_docker_login_nebius() {" >> /home/devpod/.bashrc
echo " if [ -f /home/devpod/key.json ]; then" >> /home/devpod/.bashrc
echo " cat /home/devpod/key.json | docker login --username json_key --password-stdin cr.ai.nebius.cloud" >> /home/devpod/.bashrc
echo " else" >> /home/devpod/.bashrc
echo " echo 'File key.json not found'" >> /home/devpod/.bashrc
echo " echo 'See https://nebius.ai/docs/container-registry/operations/authentication#sa-json'" >> /home/devpod/.bashrc
echo " fi" >> /home/devpod/.bashrc
echo "}" >> /home/devpod/.bashrc
echo "alias docker-login-nebius=_docker_login_nebius" >> /home/devpod/.bashrc
echo "export NEBIUS_DR=cr.ai.nebius.cloud/crnirqboqtatgotcga10" >> /home/devpod/.bashrc
echo "------------------------------------------------------\n\n"
echo "Now reboot and check that the driver and docker are installed correctly"
echo "nvidia-smi"
echo "docker run hello-world"
echo "Installed ver ${VM_INIT_VERSION} !"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment