Created
September 18, 2024 13:17
-
-
Save engineervix/44e153ee5db2ad0192f12c391f1216bb to your computer and use it in GitHub Desktop.
Run Ollama on vast.ai
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# ============================================================================ | |
# Description: This script bootstraps a new vast.ai GPU instance, installs | |
# ollama and sets up a custom model for inference. | |
# | |
# author: Victor Miti <https://github.com/engineervix> | |
# ============================================================================ | |
# Exit immediately if any command fails | |
set -e | |
# Source the .env file so we can retrieve some environment variables | |
# shellcheck source=/dev/null | |
source .env | |
DATESTRING=$(date +"%Y%m%d%H%M%S") | |
START_TIME=$(date +%s) | |
KEY_NAME="vast_ssh_$DATESTRING" | |
PASSPHRASE="" | |
# Search for available instances based on specified criteria | |
# then pick the 1st instance from the results, and launch it | |
machine_id=$(vastai search offers -d 'num_gpus=1 disk_space>=40 gpu_ram>=20 gpu_arch=nvidia dph<1 dlperf>30' -o 'dph' | sed -n '2p' | cut -d ' ' -f 1) | |
echo "The ID of the first result is: $machine_id" | |
# Create a new instance with the specified configuration | |
vastai create instance "$machine_id" \ | |
--image ollama/ollama:0.3.6 \ | |
--env '-p 11434:11434 -e TZ=Europe/London' \ | |
--disk 40 \ | |
--ssh \ | |
--direct | |
# get the instance id of the newly created instance | |
instance_id=$(vastai show instances | sed -n '2p' | cut -d ' ' -f 1) | |
get_status() { | |
vastai show instance "$instance_id" | awk 'NR==2 {print $3}' | |
} | |
handle_vastai_error() { | |
local destroy_server=${1:-true} | |
echo "An error occurred." | |
if [ "$destroy_server" = true ]; then | |
vastai destroy instance "$instance_id" | |
fi | |
END_TIME=$(date +%s) | |
DURATION=$(( END_TIME - START_TIME )) | |
echo "Script failed after $DURATION seconds." | |
exit 1 | |
} | |
instance_status=$(get_status) | |
elapsed_time=0 | |
max_time=300 # 5 minutes in seconds | |
# Check the status every 5 seconds until it is "running" or until the timeout is reached | |
while [ "$instance_status" != "running" ] && [ $elapsed_time -lt $max_time ]; do | |
echo "Current status: $instance_status. Waiting for 'running' status..." | |
sleep 5 | |
elapsed_time=$((elapsed_time + 5)) | |
instance_status=$(get_status) | |
done | |
# $OLLAMA_DIR & $LLM_DIR are defined in the .env file | |
ollama_directory=$(basename "$OLLAMA_DIR") | |
llm_directory=$(basename "$LLM_DIR") | |
# if status is "running", let's SSH into the instance | |
if [ "$instance_status" == "running" ]; then | |
echo "Instance is now running!" | |
# We need to generate an SSH key and add it to the machine | |
## but first, delete any that may exist | |
rm -fv vast_ssh_* | |
ssh-keygen -t ed25519 -f "$KEY_NAME" -N "$PASSPHRASE" -C "vast-box-$DATESTRING@$(hostname)" | |
vastai attach ssh "$instance_id" "$(cat "$KEY_NAME.pub")" | |
# wait for a few seconds before attempting to ssh into the instance | |
sleep 20 | |
# disable tmux and create directories for ollama | |
ssh "$(vastai ssh-url "$instance_id")" -o StrictHostKeyChecking=no -i "$KEY_NAME" "touch ~/.no_auto_tmux; mkdir -p {~/$ollama_directory,~/$llm_directory}" | |
else | |
echo "Timeout reached. Instance did not change to 'running' within 5 minutes." | |
exit 1 | |
fi | |
# Copy the custom model to the instance | |
# this may take a while depending on your internet connection. | |
# A better approach is probably to host your model files securely somewhere, | |
# then ssh into the vast.ai machine and download from there. | |
ssh_url=$(vastai ssh-url "$instance_id") | |
port=$(echo "$ssh_url" | cut -d ':' -f 3) | |
rsync -chavzP -e "ssh -o StrictHostKeyChecking=no -p $port -i $KEY_NAME" "$OLLAMA_DIR" "$(basename "$ssh_url" | cut -d ':' -f 1)":~/"$ollama_directory" | |
rsync -chavzP -e "ssh -o StrictHostKeyChecking=no -p $port -i $KEY_NAME" "$GGUF_FILE" "$(basename "$ssh_url" | cut -d ':' -f 1)":~/"$llm_directory/" | |
# now you can login and run ollama on the GPU instance | |
DESTROY_INSTANCE=false | |
{ | |
# shellcheck disable=SC2087 | |
ssh "$(vastai ssh-url "$instance_id")" -o StrictHostKeyChecking=no -i "$KEY_NAME" << EOF | |
# Exit immediately if a command exits with a non-zero status | |
set -e | |
OLLAMA_HOST=0.0.0.0 ollama serve > ollama.log 2>&1 & | |
cd ~/"$ollama_directory" | |
ollama create "$MODEL_NAME" -f "$MODEL_NAME/Modelfile" | |
EOF | |
} || handle_vastai_error "$DESTROY_INSTANCE" | |
# get details of the running instance. | |
json_output=$(vastai show instance "$instance_id" --raw) | |
# get the IP address and host port of the instance using jq | |
# NOTE: The output JSON appears to be improperly formatted due to line breaks or | |
# trailing commas, and I had trouble parsing it with jq. The sed & tr commands attempt to | |
# clean up the JSON before parsing it. We look for commas followed by any amount | |
# of whitespace and a closing brace, and replaces them with just the closing brace, | |
# effectively removing trailing commas before closing braces. This regex won't handle | |
# trailing commas in arrays correctly (],), so you might need to adjust it if needed. | |
# TODO: raise an issue with vast.ai to fix this. | |
ip_address=$(echo "$json_output" | sed 's/,\s*}/}/g' | tr -d '\n' | jq -r '.public_ipaddr') | |
http_port=$(echo "$json_output" | sed 's/,\s*}/}/g' | tr -d '\n' | jq -r '.ports["11434/tcp"][] | select(.HostIp == "0.0.0.0") | .HostPort') | |
echo "IP Address: $ip_address" | |
echo "Host Port: $http_port" | |
# update OLLAMA_API_BASE in .env with the new IP address and port | |
sed -i "s/^OLLAMA_API_BASE=.*/OLLAMA_API_BASE=http:\/\/$ip_address:$http_port/" .env | |
echo -e "\033[35mNow you should be able to use the ollama API in your wagtail project! 🚀\033[0m" | |
echo -e "\033[35mRemember to destroy the instance when you're done\033[0m" | |
echo -e " \033[31mvastai destroy instance \"\$instance_id\"\033[0m" | |
echo -e "\033[35m\nand cleanup 🗑️\033[0m" | |
echo -e " \033[31mrm -fv vast_ssh_* ssh_*.json\033[0m" | |
# destroy the instance when you're done | |
# vastai destroy instance "$instance_id" | |
# cleanup | |
# rm -fv vast_ssh_* ssh_*.json |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment