Last active
April 19, 2023 15:08
-
-
Save wrgoldstein/1e805d9478c4c42260ec69c192c53c74 to your computer and use it in GitHub Desktop.
Getting set up to do GPU accelerated machine learning in Elixir on a fresh Google Cloud Platform VM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# So you want to do machine learning in elixir on a fresh GCP VM with a GPU? | |
=== setup a VM === | |
1. chose a gpu (T4), make sure you use debian 11 bullseye and the right region | |
2. dont use the default boot disk | |
3. do change the boot disk to have 50GB of space | |
4. get yourself a bunch of ram (for re-training BERT with 250 text samples 15Gb wasn't enough) | |
5. ssh in: | |
gcloud compute ssh instance-6 | |
=== install the nvidia driver=== | |
<<< need to do this even though it gets uninstalled later, apparently >>> | |
sudo apt upgrade | |
sudo apt install build-essential -y | |
sudo apt-get install linux-headers-`uname -r` | |
BASE_URL=https://us.download.nvidia.com/tesla | |
DRIVER_VERSION=525.105.17 | |
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run | |
sudo sh NVIDIA-Linux-x86_64-$DRIVER_VERSION.run | |
* guessing is ok | |
* dont need 32 bit compat | |
* dont need vulcan | |
nvidia-smi # check that it works! | |
=== install cuda 118 === | |
# (this is the slowest part) | |
curl -O https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-debian11-11-8-local_11.8.0-520.61.05-1_amd64.deb | |
sudo dpkg -i cuda-repo-debian11-11-8-local_11.8.0-520.61.05-1_amd64.deb | |
sudo cp /var/cuda-repo-debian11-11-8-local/cuda-*-keyring.gpg /usr/share/keyrings/ | |
sudo apt install software-properties-common -y | |
sudo add-apt-repository contrib | |
sudo apt-get update | |
sudo apt-get -y install cuda | |
# * english | |
# * yes uninstall the previous driver stuff | |
=== install cudnn === | |
# go to https://developer.nvidia.com/rdp/cudnn-download | |
# open inspector / network tab and make sure record is set | |
# download the right package for debian 11 | |
# cancel it | |
# right click the right request after all the redirects and copy as curl | |
# change the filename with -o removing the ? and everything after | |
sudo dpkg -i cudnn-local-repo-debian11-8.9.0.131_1.0-1_amd64.deb | |
sudo cp /var/cudnn-local-repo-debian11-8.9.0.131/cudnn-local-EDAE65B7-keyring.gpg /usr/share/keyrings/ | |
=== install erlang esl from erlang home === | |
curl -O https://packages.erlang-solutions.com/erlang/debian/pool/esl-erlang_25.3-1~debian~bullseye_amd64.deb | |
sudo dpkg -i esl-erlang_25.3-1~debian~bullseye_amd64.deb | |
sudo apt --fix-broken install -y | |
=== install elixir from source === | |
mkdir elixir | |
cd elixir | |
curl -LO https://github.com/elixir-lang/elixir/releases/download/v1.14.4/elixir-otp-25.zip | |
sudo apt install unzip | |
unzip elixir-otp-25.zip | |
export PATH="/home/williamgoldstein/elixir/bin:$PATH" | |
=== set EXLA flags === | |
export XLA_TARGET=cuda118 | |
=== create an elixir project === | |
cd ~ | |
mix new proj | |
cd proj | |
vim mix.exs | |
# add dependencies: | |
{:bumblebee, "~> 0.3.0"}, | |
{:axon, "~> 0.5.1"}, | |
{:nx, "~> 0.5.1"}, | |
{:exla, "~> 0.5.1"}, | |
{:explorer, "~> 0.5.0"} | |
mix local.hex --force | |
mix deps.get | |
mix local.rebar --force |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment