Last active
October 15, 2020 09:57
-
-
Save anj-s/6186a140e25a282badea85ee1d92f98c to your computer and use it in GitHub Desktop.
Commands required to setup a GCE instance to run Distributed Tensorflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install pip | |
wget https://bootstrap.pypa.io/get-pip.py | |
sudo python get-pip.py | |
# Install CUDA 9 | |
curl -O http://developer.download.nvidia.com/compute/cuda/rep | |
os/ubuntu1604/x86_64/cuda-repo-ubuntu1604_9.0.176-1_amd64.deb | |
sudo dpkg -i cuda-repo-ubuntu1604_9.0.176-1_amd64.deb | |
sudo apt-get update | |
sudo apt-get install cuda | |
# Install CUDnn 7 library | |
# The driver has been uploaded to a GCS bucket for easy access | |
gsutil cp gs://lib-driver . | |
sudo dpkg -i libcudnn7_7.0.3.11-1+cuda9.0_amd64.deb | |
# Install TensorFlow 1.8 | |
pip install tensorflow-gpu | |
# Run the ResNet50 model | |
git clone https://github.com/tensorflow/models.git | |
export PYTHONPATH="$PYTHONPATH:/home/anjalisridhar/models" | |
sudo pip install -r models/official/requirements.txt | |
cd models/official/resnet | |
# GPU = 8 | |
python imagenet_main.py --batch_size=1024 --model_dir=gs://singlenode-gcp-benchmarks/demo-v100-numgpus8_r2 --dtype=fp16 --data_dir=/mnt/disks/imagenet/imagenet/combined --num_gpus=8 | |
# Spin up Tensorboard using the Google Cloud shell | |
tensorboard —log_dir=gs://singlenode-gcp-benchmarks/demo_model_dir_numgpus8 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment