Skip to content

Instantly share code, notes, and snippets.

@alexandrnikitin
Last active April 16, 2022 13:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save alexandrnikitin/4acabec89f0f709af2e82b7da5c6def7 to your computer and use it in GitHub Desktop.
Save alexandrnikitin/4acabec89f0f709af2e82b7da5c6def7 to your computer and use it in GitHub Desktop.
How to launch Colab on a GCE spot instance
gcloud beta compute instances create colab-1-vm \
--machine-type=e2-highmem-4 \
--provisioning-model=SPOT \
--boot-disk-size=200GB \
--boot-disk-type=pd-balanced \
--instance-termination-action=DELETE \
--image=https://www.googleapis.com/compute/v1/projects/colab-marketplace-image-public/global/images/colab-mp-20220110 \
--network=https://www.googleapis.com/compute/v1/projects/<INSERT_PROJECT_ID>/global/networks/default \
--subnet=https://www.googleapis.com/compute/v1/projects/<INSERT_PROJECT_ID>/regions/<INSERT_REGION>/subnetworks/default \
--no-restart-on-failure \
--metadata-from-file=user-data=cloud_init.conf \
--service-account=<INSERT_ACCOUNT>@developer.gserviceaccount.com \
--scopes=https://www.googleapis.com/auth/cloud.useraccounts.readonly,https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring.write,https://www.googleapis.com/auth/cloudruntimeconfig,https://www.googleapis.com/auth/compute
#cloud-config
users:
- name: datalab
uid: 2000
groups: docker
- name: tunnelbe
uid: 2001
groups: docker
- name: kmp_user
uid: 2002
groups: docker
write_files:
- path: /etc/systemd/system/startup.sh
permissions: 0755
owner: root
content: |
#!/bin/bash -ex
truncate -s 190G /var/lib/docker/colab-vmdisk
# NB: Corrupted filesystems are not supported. fsck will return a non-zero
# exit status even if it fixed issues, and we exit on errors (-e above).
# The reason for the fsck is solely to satisfy a resize2fs requirement.
e2fsck -pf /var/lib/docker/colab-vmdisk
resize2fs /var/lib/docker/colab-vmdisk
mkdir -p /sys/fs/cgroup/jupyter-children
bytes=$(free -b | awk '/^Mem:/{print $4}') ; adj_bytes=$(expr "${bytes}" - 2000000000 ); echo "${adj_bytes}"> /sys/fs/cgroup/jupyter-children/memory.max
SHMSZ=$(expr "${adj_bytes}" / 2000000)
echo -e "SHMSZ=${SHMSZ}m\n" >| /var/kernel-docker-disk-limits.env
/etc/systemd/system/configure-docker.sh
/etc/systemd/system/prep-var-lib-docker-mount.sh
sysctl -w vm.dirty_ratio=2 vm.dirty_background_ratio=1
apparmor_parser --remove /etc/apparmor.d/kernel || true
apparmor_parser -av /etc/apparmor.d/kernel
/sbin/iptables-restore < /etc/iptables.cfg
mkdir -p /tmp/var-colab && /bin/hostname > /tmp/var-colab/hostname && date --rfc-3339=ns >> /tmp/colab-docker-starts
# Check for the existance of all Colab containers.
k_default_exists=$(docker ps -q -a -f name=k_default)
kmp_default_exists=$(docker ps -q -a -f name=kmp_default)
tunnelbevm_exists=$(docker ps -q -a -f name=tunnelbevm)
systemctl daemon-reload
if [[ -z "${k_default_exists}" ]]; then
systemctl start k_default.service
else
docker restart k_default
fi
systemctl start colab-kmsg-dumper.service
systemctl start move-jupyter-children-to-memory-cgroup.service
if [[ -z "${tunnelbevm_exists}" ]]; then
systemctl start tunnelbe.service
else
docker restart tunnelbevm
fi
if [[ -z "${kmp_default_exists}" ]]; then
systemctl start kmp_default.service
else
docker restart kmp_default
fi
systemctl start node-problem-detector.service
- path: /etc/systemd/system/move-jupyter-children-to-memory-cgroup.sh
permissions: 0755
owner: root
content: |
#!/bin/bash -eu
# Relocate user-run processes into a memory cgroup so that if oom-killer is triggered by one of
# them, only they are eligible for killing. This increases the chances that jupyter-notebook
# survives the event, maintaining connectivity to the front-end.
# NOTE: It would be nicer to poll on a notification mechanism for new processes (e.g. forkstat)
# but none of the tools that use the Process Events Connector (https://lwn.net/Articles/157150/)
# appear to be present on COS, and the events aren't made available via an FS mount like sysfs
# or procfs. The body of the loop below takes ~7ms to run, though, so not worrying about it.
CGROUPPROCS=/sys/fs/cgroup/jupyter-children/cgroup.procs
OOM_SCORE_ADJ_MAX=1000
while :; do
# Suppress unnecessary noise by ignoring already-moved PIDs.
# This needs to be done in each iteration to deal with PID reuse.
declare -A handledPIDs
while read pid ; do
handledPIDs[$pid]=1
done < ${CGROUPPROCS}
# This outer loop will have at most one iteration but do it this way to avoid special-casing
# zero iterations.
for ppid in $(pidof -x jupyter-notebook); do
for pid in $(ps -o pid= --ppid $ppid); do
[[ -v handledPIDs[$pid] ]] && continue
echo "$pid"
done
done | tee ${CGROUPPROCS}
# LSP receives special treatment per b/203467697.
for pid in $(pgrep -f pyright-langserver.js); do
[[ -v handledPIDs[$pid] ]] && continue
echo "$pid" | tee ${CGROUPPROCS}
echo "$OOM_SCORE_ADJ_MAX" > /proc/${pid}/oom_score_adj || true
done
unset handledPIDs
sleep 1
done
- path: /etc/systemd/system/load-nvidia-modules.sh
permissions: 0755
owner: root
content: |
#!/bin/bash -ex
bash /var/lib/nvidia/setup_gpu.sh
- path: /etc/systemd/system/move-jupyter-children-to-memory-cgroup.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Jupyter children cgroup mover
[Service]
ExecStart=/etc/systemd/system/move-jupyter-children-to-memory-cgroup.sh
Restart=always
RestartSec=1
- path: /etc/apparmor.d/kernel
permissions: 0755
owner: root
content: |
#include <tunables/global>
profile kernel flags=(attach_disconnected) {
#include <abstractions/base>
capability,
dbus,
file,
mount,
network,
pivot_root,
ptrace,
remount,
signal,
umount,
unix,
deny mount fstype = cgroup,
deny mount options in (bind) /sys/**,
deny /sys/fs/cgroup/**/release_agent w,
}
- path: /etc/systemd/system/configure-docker.sh
permissions: 0755
owner: root
content: |
#!/bin/bash -ex
/usr/bin/docker network create -d bridge \
--subnet=172.28.0.0/16 \
--gateway=172.28.0.1 \
--ip-range=172.28.0.0/24 \
-o "com.docker.network.bridge.name"="br0" \
br0 || true
- path: /etc/systemd/system/prep-var-lib-docker-mount.sh
permissions: 0755
owner: root
content: |
#!/bin/bash -ex
FILE=/var/lib/docker/colab-vmdisk
OVERLAY2=/var/lib/docker/overlay2
# Stop docker before unmounting its overlay2 directory to avoid confusing it.
systemctl stop docker
# Be robust to overlay2 already being remounted by COS startup scripts.
umount "$OVERLAY2" || true
mount -vo nosuid,nodev "$FILE" "$OVERLAY2"
# Update this setting while dockerd is stopped to have it take effect on restart.
/bin/sed -i -e '/"storage-driver/i"iptables": false,\n"log-driver": "none",' /etc/docker/daemon.json
# Start docker after mounting its overlay2 directory to have it create needed subdirectories (e.g. 'l').
systemctl start docker
- path: /etc/iptables.cfg
permissions: 0755
owner: root
content: |
*nat
:PREROUTING ACCEPT
:INPUT ACCEPT
:OUTPUT ACCEPT
:POSTROUTING ACCEPT
:DOCKER -
-A PREROUTING -m addrtype --dst-type LOCAL -j DOCKER
-A OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j DOCKER
-A POSTROUTING -s 172.28.0.0/16 ! -o br0 -j MASQUERADE
-A DOCKER -i br0 -j RETURN
COMMIT
*mangle
:PREROUTING ACCEPT
:INPUT ACCEPT
:FORWARD ACCEPT
:OUTPUT ACCEPT
:POSTROUTING ACCEPT
COMMIT
*filter
:INPUT ACCEPT
:FORWARD DROP
:OUTPUT ACCEPT
:DOCKER -
-A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT
-A INPUT -i lo -j ACCEPT
-A INPUT -p icmp -j ACCEPT
-A INPUT -p tcp -m tcp --dport 22 -j ACCEPT
-A INPUT -d 172.28.0.1 -i br0 -p tcp -m tcp --dport 8008 -j ACCEPT
-A FORWARD -s 172.28.0.3 -d 169.254.0.0/16 -i br0 -p tcp -m tcp --dport 80 -j ACCEPT
-A FORWARD -s 172.28.0.13 -d 169.254.0.0/16 -i br0 -p tcp -m tcp --dport 80 -j ACCEPT
-A FORWARD -o br0 -j DOCKER
-A FORWARD -o br0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -i br0 ! -o br0 -j ACCEPT
-A FORWARD -i br0 -o br0 -j ACCEPT
-A OUTPUT -m state --state NEW,RELATED,ESTABLISHED -j ACCEPT
-A OUTPUT -o lo -j ACCEPT
COMMIT
- path: /etc/systemd/system/colab-metadata-monitor.sh
permissions: 0700
owner: root
content: |
#!/bin/bash -eu
# Why? b/78509777.
# https://cloud.google.com/compute/docs/storing-retrieving-metadata#aggcontents
ORIGINAL="$(curl -isS -m 2 -H "Metadata-Flavor: Google" "http://metadata/computeMetadata/v1/?recursive=true" | tr -d '\015')"
ETAG="$(echo "$ORIGINAL" | sed -n -e 's/^ETag: \(.*\)/\1/p')"
echo "$ORIGINAL"
# https://cloud.google.com/compute/docs/storing-retrieving-metadata#etags
curl -isS -H "Metadata-Flavor: Google" "http://metadata/computeMetadata/v1/?recursive=true&wait_for_change=true&last_etag=$ETAG"
- path: /etc/systemd/system/colab-kmsg-dumper.sh
permissions: 0700
owner: root
content: |
#!/bin/bash -eu
echo >>/tmp/var-colab/ooms
grep --line-buffered -e " invoked oom-killer" -e "segfault at" /dev/kmsg | awk -W interactive '{ print systime() "," $0 }' >>/tmp/var-colab/ooms
- path: /etc/systemd/system/colab-kmsg-dumper.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Kernel Log Dumper
[Service]
ExecStart=/etc/systemd/system/colab-kmsg-dumper.sh
Restart=always
RestartSec=1
- path: /etc/systemd/system/tunnelbe.service
permissions: 0644
owner: root
content: |
[Unit]
Description=tunnelbe docker container
Requires=network-online.target
After=network-online.target
Requires=docker.service
After=docker.service
[Service]
Environment="HOME=/home/tunnelbe"
# This ensures that credentials are in place for docker pull commands.
ExecStartPre=/usr/bin/docker-credential-gcr configure-docker
ExecStop=/usr/bin/docker stop tunnelbevm
# Pass -t below to prevent unfortunate buffering in log-watching through journalctl.
ExecStart=/usr/bin/docker -D run --net=host -t -u 0 \
--pid=host \
--privileged \
--device=/dev/loop0 \
--name=tunnelbevm \
-v /tmp/colab-vm:/tmp/colab-vm \
-v /var/lib/docker/colab-vmdisk:/var/lib/docker/colab-vmdisk \
gcr.io/colab-datalab/tunnelbackend_binary:baked \
--backend_url_map='{"default":{"local_target":"http://172.28.0.2:8080","kernel_manager":"http://172.28.0.2:9000","kernel_manager_proxy_debug_vars":"http://172.28.0.3:6000/debug/vars"}}' \
--enable_jwt \
--debug_port=4000 \
--min_pending_requests=2 \
--tunnel_url=https://colab.research.google.com/tun/u \
--request_rate_limit=0 \
--manual_tunneled_request_port=0
# The docker pull 403's sometimes, as if the ExecStartPre command above
# didn't run, even though it did, and even though a subsequent interactive
# [sudo systemctl start tunnelbe.service] works just fine. Work around
# this by restarting on apparent failure.
Restart=always
RestartSec=1
- path: /etc/systemd/system/kmp_default.service
permissions: 0644
owner: root
content: |
[Unit]
Description=kernel_manager_proxy default docker container
Requires=network-online.target
After=network-online.target
Requires=docker.service
After=docker.service
[Service]
Environment="HOME=/home/kmp_user"
# This ensures that credentials are in place for docker pull commands.
ExecStartPre=/usr/bin/docker-credential-gcr configure-docker
ExecStop=/usr/bin/docker stop kmp_default
# Pass -t below to prevent unfortunate buffering in log-watching through journalctl.
ExecStart=/usr/bin/docker -D run -t -u 0 \
--net br0 \
--ip 172.28.0.3 \
--name=kmp_default \
gcr.io/colab-datalab/kernel_manager_proxy:baked \
--listen_host=172.28.0.3 \
--target_host=172.28.0.2 \
--listen_port=6000 \
--target_port=9000 \
--enable_output_coalescing=true \
--output_coalescing_required=true
# Mirror the docker pull 403 work-around applied to the TBE.
Restart=always
RestartSec=1
- path: /etc/systemd/system/k_default.service
permissions: 0644
owner: root
content: |
[Unit]
Description=kernel default docker container
Requires=network-online.target
After=network-online.target
Requires=docker.service
After=docker.service
[Service]
Environment="HOME=/home/datalab"
EnvironmentFile=/var/kernel-docker-disk-limits.env
# This ensures that credentials are in place for docker pull commands.
ExecStartPre=/usr/bin/docker-credential-gcr configure-docker
# Add the marker for the marketplace image.
ExecStartPre=/bin/bash -c 'mkdir -p /tmp/var-colab && touch /tmp/var-colab/mp'
ExecStop=/usr/bin/docker stop k_default
ExecStart=/usr/bin/docker run -u 0 \
--net br0 \
--ip 172.28.0.2 \
--name=k_default \
--cap-add SYS_ADMIN \
--cap-add SYS_PTRACE \
--device /dev/fuse \
--security-opt apparmor=kernel \
--volume /tmp/var-colab:/var/colab \
\
--volume /tmp/colab-shared:/var/colab/shared \
--shm-size=${SHMSZ} \
--env='GCS_READ_CACHE_BLOCK_SIZE_MB=16' \
--env='GCE_METADATA_TIMEOUT=0' \
--env='PYTHONWARNINGS=ignore:::pip._internal.cli.base_command' \
--env='NO_GCE_CHECK=True' \
--env='DATALAB_SETTINGS_OVERRIDES={"kernelManagerProxyPort":6000,"kernelManagerProxyHost":"172.28.0.3","jupyterArgs":["--ip=\\\"172.28.0.2\\\""],"debugAdapterMultiplexerPath":"/usr/local/bin/dap_multiplexer","enableLsp":true}' \
gcr.io/colab-datalab/datalab:baked \
# If a user causes the container to exit (say by killing the node app), we
# prefer to restart rather than make the user wait for the VM to go unhealthy.
Restart=always
RestartSec=1
- path: /etc/motd
permissions: 0644
owner: root
content: |
To use this VM in the Colab UI, navigate to the URL:
https://colab.research.google.com/<TODO>
runcmd:
- /etc/systemd/system/startup.sh
# See go/iwsdy.
- systemctl stop update-engine.service
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment