Skip to content

Instantly share code, notes, and snippets.

@troykelly
Last active April 27, 2024 05:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save troykelly/0a44b4a966c44845e445f3db5cca9f1d to your computer and use it in GitHub Desktop.
Save troykelly/0a44b4a966c44845e445f3db5cca9f1d to your computer and use it in GitHub Desktop.
Build a docker machine with rclone
vm.max_map_count=262144
net.core.rmem_max=2500000

Build rclone docker machine with gpu

Build

Set up key access

sudo rm /etc/sudoers.d/operations || true && \
  echo "operations ALL=(ALL) NOPASSWD: ALL" | sudo tee /etc/sudoers.d/operations && \
  echo "vm.max_map_count=262144" | sudo tee /etc/sysctl.d/90-elasticsearch.conf && \
  echo "net.core.rmem_max=2500000" | sudo tee /etc/sysctl.d/90-quic.conf && \
  mkdir -p /home/operations/.ssh && \
  touch /home/operations/.ssh/authorized_keys && \
  sudo chown -R operations:operations /home/operations/.ssh && \
  sudo chmod 640 /home/operations/.ssh/authorized_keys && \
  echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPbpCjkiYBPlx34WIDY2er5BuFT4BFWmTSGFNJCHoxo7 operations@aperim.com" | sudo tee -a /home/operations/.ssh/authorized_keys
  exit

DNS, zsh, logging, etc

echo "DNSStubListener=no" | sudo tee -a /etc/systemd/resolved.conf && \
  sudo systemctl disable systemd-resolved.service && \
  sudo systemctl stop systemd-resolved && \
  sudo rm /etc/resolv.conf && \
  sudo ln -sf /run/systemd/resolve/resolv.conf /etc/resolv.conf && \
  sudo systemctl enable systemd-resolved && \
  sudo systemctl start systemd-resolved && \
  sudo apt-get update && \
  sudo DEBIAN_FRONTEND=noninteractive apt-get -y install build-essential rsyslog zsh vim libarchive-tools git open-vm-tools unzip wget htop nvtop && \
  sudo chsh -s $(which zsh) $(whoami) && \
  sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended && \
  sudo rm -f /etc/rsyslog.d/log001_sy3* && \
  echo "*.* @log001.public-servers.sy3.aperim.net:1514;RSYSLOG_SyslogProtocol23Format" | sudo tee /etc/rsyslog.d/log001_sy3.conf > /dev/null && \
  sudo chmod 440 /etc/rsyslog.d/log001_sy3.conf && \
  sudo systemctl enable rsyslog && \
  sudo systemctl start rsyslog && \
  sudo DEBIAN_FRONTEND=noninteractive apt-get -y full-upgrade && \
  sudo curl -L "https://gist.githubusercontent.com/troykelly/0a44b4a966c44845e445f3db5cca9f1d/raw/90-elasticsearch.conf" -o /etc/sysctl.d/90-elasticsearch.conf && \
  sudo curl -L "https://gist.githubusercontent.com/troykelly/0a44b4a966c44845e445f3db5cca9f1d/raw/90-quic.conf" -o /etc/sysctl.d/90-quic.conf && \
  sudo /sbin/reboot

Rclone

sudo -v ; curl https://rclone.org/install.sh | sudo bash -s beta && \
sudo curl -L "https://gist.githubusercontent.com/troykelly/0a44b4a966c44845e445f3db5cca9f1d/raw/docker-volume-rclone.service" -o /etc/systemd/system/docker-volume-rclone.service && \
sudo curl -L "https://gist.githubusercontent.com/troykelly/0a44b4a966c44845e445f3db5cca9f1d/raw/docker-volume-rclone.socket" -o /etc/systemd/system/docker-volume-rclone.socket

Docker

See https://docs.docker.com/engine/install/ubuntu/

sudo DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates curl gnupg && \
sudo install -m 0755 -d /etc/apt/keyrings && \
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \
sudo chmod a+r /etc/apt/keyrings/docker.gpg && \
echo "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null && \
sudo apt-get update && \
sudo DEBIAN_FRONTEND=noninteractive apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin && \
sudo usermod -aG docker $USER && \
sudo systemctl enable docker.service && \
sudo systemctl enable containerd.service

GlusterFS

sudo DEBIAN_FRONTEND=noninteractive apt-get -y install software-properties-common && \ 
  sudo add-apt-repository -y ppa:gluster/glusterfs-11 && \
  sudo apt-get update && \
  sudo DEBIAN_FRONTEND=noninteractive apt-get -y install glusterfs-client

Config

Rclone

Put the config files in /var/lib/docker-plugins/rclone/config/

Eg:

sudo mkdir -p /var/lib/docker-plugins/rclone/config /var/lib/docker-plugins/rclone/cache && \
sudo cp -R ~/rclone/. /var/lib/docker-plugins/rclone/config/ && \
sudo chown -R root:docker /var/lib/docker-plugins/rclone/config /var/lib/docker-plugins/rclone/cache && \
sudo rm -Rf /var/lib/docker-plugins/rclone/config/config.log; \
sudo chmod 640 /var/lib/docker-plugins/rclone/config/*
sudo systemctl enable docker-volume-rclone.service && \
sudo systemctl start docker-volume-rclone.service && \
docker plugin install rclone/docker-volume-rclone:amd64 --alias rclone --grant-all-permissions args="--vfs-cache-mode=full --vfs-read-ahead=512M --allow-other"

GlusterFS

Replace example with server names below

docker plugin install --alias glusterfs mochoa/glusterfs-volume-plugin --grant-all-permissions --disable && \
  docker plugin set glusterfs SERVERS=gluster-a.example.com,gluster-b.example.com && \
  docker plugin enable glusterfs

GPU

Clean up docker

sudo systemctl stop docker && \
sudo DEBIAN_FRONTEND=noninteractive apt-get -y remove nvidia-docker2 nvidia-container-toolkit && \
sudo DEBIAN_FRONTEND=noninteractive apt-get -y autoremove && \
sudo /sbin/reboot

Upgrade all the things

sudo systemctl stop docker && \
sudo apt-get update && sudo DEBIAN_FRONTEND=noninteractive apt-get -y full-upgrade && \
sudo apt-get -y autoremove && \
sudo /sbin/reboot

Remove existing CUDA

sudo systemctl stop docker && \
sudo /usr/local/cuda-*/bin/cuda-uninstaller

Disable validation

sudo mokutil --disable-validation &&
sudo /sbin/reboot

Remove existing NVIDIA

sudo systemctl stop docker && \
sudo DEBIAN_FRONTEND=noninteractive apt-get remove "nvidia-*" "cuda-*" && \
sudo DEBIAN_FRONTEND=noninteractive apt-get -y autoremove && \
sudo update-initramfs -u && \
sudo /sbin/reboot

Add needed modprobe

sudo rm -Rf /etc/modprobe.d/blacklist-nvidia-nouveau.conf; \
sudo rm -Rf /etc/modprobe.d/nvidia-unsupported-gpu.conf; \
echo blacklist nouveau | sudo tee /etc/modprobe.d/blacklist-nvidia-nouveau.conf && \
echo options nouveau modeset=0 | sudo tee -a /etc/modprobe.d/blacklist-nvidia-nouveau.conf && \
echo options nvidia NVreg_OpenRmEnableUnsupportedGpus=1 | sudo tee /etc/modprobe.d/nvidia-unsupported-gpu.conf && \
sudo update-initramfs -u && \
sudo /sbin/reboot

Install CUDA

sudo rm -Rf /tmp/*; \
sudo rm -Rf ~/cuda_*.run ~/nvidia-patch /opt/nvidia/libnvidia-encode-backup /opt/nvidia/libnvidia-fbc-backup; \
git clone --depth=1 https://github.com/keylase/nvidia-patch.git ~/nvidia-patch && \
curl -L https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run -o ~/cuda_linux.run && \
chmod +x ~/cuda_linux.run && \
sudo ~/cuda_linux.run -m=kernel-open && \
sudo /sbin/reboot

Check the driver worked

nvidia-smi

Patch NVIDIA driver

cd ~/nvidia-patch && \
sudo ./patch.sh && \
sudo ./patch-fbc.sh && \
sudo /sbin/reboot

Docker & NVIDIA

sudo rm -f /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && \
distribution=$(. /etc/os-release; echo $ID$VERSION_ID) && \
repository_base_url="https://nvidia.github.io/libnvidia-container" && \
check_version() { \
  ver=$1 && \
  while [ "$ver" -ge 16 ]; do \
    if curl --output /dev/null --silent --head --fail "$repository_base_url/ubuntu$ver.04/libnvidia-container.list"; then \
      echo "ubuntu$ver.04"; return; \
    elif curl --output /dev/null --silent --head --fail "$repository_base_url/ubuntu$ver.10/libnvidia-container.list"; then \
      echo "ubuntu$ver.10"; return; \
    else \
      ((ver--)); \
    fi; \
  done; \
  echo ""; \
} && \
fallback_version=$(check_version $(echo $distribution | grep -oP '(\d{2})\.(\d{2})' | cut -d. -f1)) && \
if [ -n "$fallback_version" ]; then \
  distribution=$fallback_version; \
  echo "Using fallback version $distribution"; \
  curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg; \
  curl -s -L "$repository_base_url/$distribution/libnvidia-container.list" | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list; \
  sudo systemctl stop docker; \
  sudo apt-get update; \
  sudo DEBIAN_FRONTEND=noninteractive apt-get install -y nvidia-container-toolkit; \
  sudo nvidia-ctk runtime configure --runtime=docker; \
  sudo systemctl start docker; \
  echo "NVIDIA Docker repository setup complete for distribution: $distribution"; \
else \
  echo "No supported NVIDIA Docker repository could be found."; \
fi

IPv6 in Docker

Follow instructions

https://gist.github.com/troykelly/749b48712d0d777030e7dd5d64916744

[Unit]
Description=Docker Volume Plugin for rclone
Requires=docker.service
Before=docker.service
After=network.target
Requires=docker-volume-rclone.socket
After=docker-volume-rclone.socket
[Service]
ExecStart=/usr/bin/rclone serve docker
ExecStartPre=/bin/mkdir -p /var/lib/docker-volumes/rclone
ExecStartPre=/bin/mkdir -p /var/lib/docker-plugins/rclone/config
ExecStartPre=/bin/mkdir -p /var/lib/docker-plugins/rclone/cache
Environment=RCLONE_CONFIG=/var/lib/docker-plugins/rclone/config/rclone.conf
Environment=RCLONE_CACHE_DIR=/var/lib/docker-plugins/rclone/cache
Environment=RCLONE_VERBOSE=1
[Install]
WantedBy=multi-user.target
[Unit]
Description=Docker Volume Plugin for rclone
[Socket]
ListenStream=/run/docker/plugins/rclone.sock
[Install]
WantedBy=sockets.target
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment