redcap3000/petals.service

## petals.service
[Service]
Type=simple
Restart=always
RestartSec=1
User=llamaLab
Environment=PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
Environment=CUDA_VISIBLE_DEVICES=0
ExecStartPre=/bin/sleep 30
ExecStart=/home/llamaLab/anaconda3/bin/python -m petals.cli.run_server petals-team/StableBeluga2 --cache_dir /mnt/BigCache/.pCache --attn_cache_tokens 12328 --max_disk_space 1000GB --num_blocks 16 --public_name redcap3000 --port 31331
KillSignal=SIGINT
[Install]
WantedBy=multi-user.target

## petals_CUDAInstall_Ubuntu22.04_Min.sh


## Installation script/instructions for Petals with Ubuntu 22.04 Server Minimum
	sudo apt-get update
	sudo apt-get upgrade
## required for cuda install; not included with minimum ubuntu
	sudo apt-get install gcc
## install CUDA as per developer.nvidia using nvidia keyring and apt-get
	wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
	sudo dpkg -i cuda-keyring_1.1-1_all.deb
	sudo apt-get update
	sudo apt-get -y install cuda
	sudo apt-get install nvidia-gds
	export PATH=/usr/local/cuda-12.2/bin${PATH:+:${PATH}}
## maybe not need formatting weird check cuda install instructions
	export LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64
                     ${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}
## if above fails perform after reboot
	sudo reboot -h now
	sudo apt update
	cd /tmp/ && curl --output anaconda.sh https://repo.anaconda.com/archive/Anaconda3-2023.07-2-Linux-x86_64.sh
	sha256sum anaconda.sh
	bash anaconda.sh
## use conda to install pytorch for cuda
	conda install pytorch pytorch-cuda=11.7 -c pytorch -c nvidia
## install git, download/install petals repo
	sudo apt-get install git
	pip install git+https://github.com/bigscience-workshop/petals
	sudo reboot -h now
## test to see it all works
	python -m petals.cli.run_server petals-team/StableBeluga2 --public_name redcap3000

## create service, activate,reload, and restart
## don't forget- if service cant find python then provide the full path to the binary
## using whereis python

	touch petals.service
	chmod 755 petals.service
	sudo cp petals.service /etc/systemd/system
	sudo systemctl enable petals.service
	sudo systemctl restart petals


## Stagger process loading to avoid connection errors as best as possible inside of services by using 'ExecPre=/bin/sleep 30'
## you cannot rely on this past 90 seconds (under 90 is recommended.)  Its difficult to stagger them properly on a celeron past 4 gpus
## 'easily' perhaps this can be addressed in future releases of petals

## other cleanup tasks - clean up apt installer caches
	sudo du -sh /var/cache/apt

## Other notes:
## Minimum HD Size 120 GB - 500/1gb recommended

## Enable OpenSSH for remote ssh login (from GUI)

## CUDA Install for ubuntu server 22.04 and cuda

## https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu

## https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_network

## https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server
## probably add ssh idents and disable password login, three keys (admin,system,user) create passwordless login workflow for ubuntu desktop, collaborators etc.

 ## Extras : Properly init. RAID array installed from ubuntu installer named 'BigCache'
## show disks
sudo lsblk -e7 -o +FSTYPE
## create file system
sudo mkfs.ext4 -F /dev/md127
sudo mkdir -p /mnt/BigCache
sudo mount /dev/md127 /mnt/BigCache
## update permissions
sudo chmod ugo+wx /mnt/BigCache
## add to /etc/fstab use this at own risk!
sudo echo ' /dev/md127      /mnt/BigCache   ext4    defaults  0 1
' >> /etc/fstab
	[Service]
	Type=simple
	Restart=always
	RestartSec=1
	User=llamaLab
	Environment=PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
	Environment=CUDA_VISIBLE_DEVICES=0
	ExecStartPre=/bin/sleep 30
	ExecStart=/home/llamaLab/anaconda3/bin/python -m petals.cli.run_server petals-team/StableBeluga2 --cache_dir /mnt/BigCache/.pCache --attn_cache_tokens 12328 --max_disk_space 1000GB --num_blocks 16 --public_name redcap3000 --port 31331
	KillSignal=SIGINT
	[Install]
	WantedBy=multi-user.target


	## Installation script/instructions for Petals with Ubuntu 22.04 Server Minimum
	sudo apt-get update
	sudo apt-get upgrade
	## required for cuda install; not included with minimum ubuntu
	sudo apt-get install gcc
	## install CUDA as per developer.nvidia using nvidia keyring and apt-get
	wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
	sudo dpkg -i cuda-keyring_1.1-1_all.deb
	sudo apt-get update
	sudo apt-get -y install cuda
	sudo apt-get install nvidia-gds
	export PATH=/usr/local/cuda-12.2/bin${PATH:+:${PATH}}
	## maybe not need formatting weird check cuda install instructions
	export LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64
	${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}
	## if above fails perform after reboot
	sudo reboot -h now
	sudo apt update
	cd /tmp/ && curl --output anaconda.sh https://repo.anaconda.com/archive/Anaconda3-2023.07-2-Linux-x86_64.sh
	sha256sum anaconda.sh
	bash anaconda.sh
	## use conda to install pytorch for cuda
	conda install pytorch pytorch-cuda=11.7 -c pytorch -c nvidia
	## install git, download/install petals repo
	sudo apt-get install git
	pip install git+https://github.com/bigscience-workshop/petals
	sudo reboot -h now
	## test to see it all works
	python -m petals.cli.run_server petals-team/StableBeluga2 --public_name redcap3000

	## create service, activate,reload, and restart
	## don't forget- if service cant find python then provide the full path to the binary
	## using whereis python

	touch petals.service
	chmod 755 petals.service
	sudo cp petals.service /etc/systemd/system
	sudo systemctl enable petals.service
	sudo systemctl restart petals




	## Stagger process loading to avoid connection errors as best as possible inside of services by using 'ExecPre=/bin/sleep 30'
	## you cannot rely on this past 90 seconds (under 90 is recommended.) Its difficult to stagger them properly on a celeron past 4 gpus
	## 'easily' perhaps this can be addressed in future releases of petals

	## other cleanup tasks - clean up apt installer caches
	sudo du -sh /var/cache/apt

	## Other notes:
	## Minimum HD Size 120 GB - 500/1gb recommended

	## Enable OpenSSH for remote ssh login (from GUI)

	## CUDA Install for ubuntu server 22.04 and cuda

	## https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu

	## https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_network

	## https://github.com/bigscience-workshop/petals/wiki/FAQ:-Frequently-asked-questions#running-a-server
	## probably add ssh idents and disable password login, three keys (admin,system,user) create passwordless login workflow for ubuntu desktop, collaborators etc.

	## Extras : Properly init. RAID array installed from ubuntu installer named 'BigCache'
	## show disks
	sudo lsblk -e7 -o +FSTYPE
	## create file system
	sudo mkfs.ext4 -F /dev/md127
	sudo mkdir -p /mnt/BigCache
	sudo mount /dev/md127 /mnt/BigCache
	## update permissions
	sudo chmod ugo+wx /mnt/BigCache
	## add to /etc/fstab use this at own risk!
	sudo echo ' /dev/md127 /mnt/BigCache ext4 defaults 0 1
	' >> /etc/fstab