Created
November 14, 2018 21:27
-
-
Save jlebar/1a37e0dc99c96b29d0610ba970b0e3f6 to your computer and use it in GitHub Desktop.
Create a GCE instance with 8 V100 GPUs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export INSTANCE_NAME="xla-benchmark-8xV100" | |
export IMAGE_FAMILY="tf-1-12-cu100" | |
export PROJECT_NAME="<your project name>" | |
gcloud beta compute instances create $INSTANCE_NAME \ | |
--project=$PROJECT_NAME \ | |
--machine-type=n1-standard-64 \ | |
--maintenance-policy=TERMINATE \ | |
--accelerator=type=nvidia-tesla-v100,count=8 \ | |
--tags=http-server,https-server \ | |
--image-family=$IMAGE_FAMILY \ | |
--image-project=deeplearning-platform-release \ | |
--boot-disk-size=100GB \ | |
--boot-disk-type=pd-ssd \ | |
--local-ssd interface=nvme \ | |
--local-ssd interface=nvme \ | |
--local-ssd interface=nvme \ | |
--local-ssd interface=nvme \ | |
--metadata install-nvidia-driver=True | |
## Combines the 4 local nvme SSD drives into a single RAID 0 drive. | |
# Install raid management tool. | |
sudo apt-get update && sudo apt-get install mdadm --no-install-recommends | |
# Creates RAID 0 array. | |
sudo mdadm --create /dev/md0 --level=0 --raid-devices=4 \ | |
/dev/nvme0n1 /dev/nvme0n2 /dev/nvme0n3 /dev/nvme0n4 | |
# Formats and mounts the array. | |
sudo mkfs.ext4 -F /dev/md0 | |
sudo mkdir -p /data/imagenet | |
sudo mount /dev/md0 /data | |
sudo chmod a+w /data | |
# Installs custom TensorFlow 1.12 binary with AVX2. Binary included on | |
# the image already has XLA but the custom binary is compiled with AVX2. | |
sudo pip install --force-reinstall https://storage.googleapis.com/tf-performance/tf_binary/tensorflow-1.12.0.a6d8ffa.AVX2.CUDA10-cp27-cp27mu-linux_x86_64.whl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment