Last active
July 14, 2022 04:11
-
-
Save rsrini7/294d67def611da193dadf746e53de0c4 to your computer and use it in GitHub Desktop.
AWS Deep Racer Ubuntu setup with NVIDIA setup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo apt update | |
sudo apt upgrade | |
nvidia-smi | |
sudo apt install nvidia-utils-515 | |
nvidia-smi | |
sudo apt install nvidia-driver-515 | |
sudo prime-select nvidia | |
nvidia-smi | |
sudo reboot | |
nvidia-smi | |
git clone https://github.com/aws-deepracer-community/deepracer-for-cloud | |
sudo apt install git | |
git clone https://github.com/aws-deepracer-community/deepracer-for-cloud | |
docker | |
sudo apt update | |
sudo apt-get install ca-certificates curl gnupg lsb-release | |
sudo mkdir -p /etc/apt/keyrings | |
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ | |
$(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null | |
sudo apt update | |
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin | |
sudo usermod -aG docker $USER | |
sudo apt install docker-compose | |
docker ps | |
sudo systemctl enable docker.service | |
sudo systemctl enable containerd.service | |
sudo reboot | |
docker ps | |
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) | |
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - | |
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list | |
sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit | |
sudo systemctl restart docker | |
sudo docker run --gpus all nvidia/cuda:11.0-base nvidia-smi | |
docker pull awsdeepracercommunity/deepracer-sagemaker:5.0.0-gpu | |
ls | |
cd deepracer-for-cloud/ | |
ls | |
bin/init.sh | |
sudo apt install jq | |
bin/init.sh | |
source bin/activate.sh | |
sudo apt install awscli | |
pip install boto3 | |
pip install --upgrade awscli | |
aws configure --profile minio | |
source bin/activate.sh | |
docker swarm init --advertise-addr 127.0.0.1 #if incase multiple network interfaces issue | |
source bin/activate.sh | |
bin/init.sh | |
source bin/activate.sh | |
docker ps | |
aws configure --profile minio | |
-> edit deepracer-for-cloud | |
-> system.env to change to gpu for sagemaker image | |
-> change hyperparameters.json, model_metadata.json, reward_function.py for getting better model | |
dr-upload-custom-files #otherwise 404 HeadObject operation: Not Found | |
dr-start-training (dont stop docker ps if feel struck.. instead open new terminal and activate, issue dr-stop-training) | |
if training started -> localhost:8080 -> click kvs_stream to see live training and evaluvation | |
-> To run policy training update below settings in /etc/docker/daemon.json | |
{ | |
"runtimes": { | |
"nvidia": { | |
"path": "nvidia-container-runtime", | |
"runtimeArgs": [] | |
} | |
}, | |
"default-runtime": "nvidia" | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How can I stop a model and then restart it on a slightly different reward function without going all the way back to iteration 0
You can also just update the run.env files manually:
DR_LOCAL_S3_MODEL_PREFIX=Experiment11c
DR_LOCAL_S3_PRETRAINED=True
DR_LOCAL_S3_PRETRAINED_PREFIX=Experiment11b
I like using a different python file for each reward function I am testing...
I am constantly updating the file manually. Here you can change the name of the reward function to whatever you want (assuming you have an actual file with that same name in your custom_files folder)
DR_LOCAL_S3_REWARD_KEY=$DR_LOCAL_S3_CUSTOM_FILES_PREFIX/reward_function_returnspeed_exp11b.py
dr-update
dr-increment-training