Skip to content

Instantly share code, notes, and snippets.

@engineervix
Created September 18, 2024 13:17
Show Gist options
  • Save engineervix/44e153ee5db2ad0192f12c391f1216bb to your computer and use it in GitHub Desktop.
Save engineervix/44e153ee5db2ad0192f12c391f1216bb to your computer and use it in GitHub Desktop.
Run Ollama on vast.ai
#!/bin/bash
# ============================================================================
# Description: This script bootstraps a new vast.ai GPU instance, installs
# ollama and sets up a custom model for inference.
#
# author: Victor Miti <https://github.com/engineervix>
# ============================================================================
# Exit immediately if any command fails
set -e
# Source the .env file so we can retrieve some environment variables
# shellcheck source=/dev/null
source .env
DATESTRING=$(date +"%Y%m%d%H%M%S")
START_TIME=$(date +%s)
KEY_NAME="vast_ssh_$DATESTRING"
PASSPHRASE=""
# Search for available instances based on specified criteria
# then pick the 1st instance from the results, and launch it
machine_id=$(vastai search offers -d 'num_gpus=1 disk_space>=40 gpu_ram>=20 gpu_arch=nvidia dph<1 dlperf>30' -o 'dph' | sed -n '2p' | cut -d ' ' -f 1)
echo "The ID of the first result is: $machine_id"
# Create a new instance with the specified configuration
vastai create instance "$machine_id" \
--image ollama/ollama:0.3.6 \
--env '-p 11434:11434 -e TZ=Europe/London' \
--disk 40 \
--ssh \
--direct
# get the instance id of the newly created instance
instance_id=$(vastai show instances | sed -n '2p' | cut -d ' ' -f 1)
get_status() {
vastai show instance "$instance_id" | awk 'NR==2 {print $3}'
}
handle_vastai_error() {
local destroy_server=${1:-true}
echo "An error occurred."
if [ "$destroy_server" = true ]; then
vastai destroy instance "$instance_id"
fi
END_TIME=$(date +%s)
DURATION=$(( END_TIME - START_TIME ))
echo "Script failed after $DURATION seconds."
exit 1
}
instance_status=$(get_status)
elapsed_time=0
max_time=300 # 5 minutes in seconds
# Check the status every 5 seconds until it is "running" or until the timeout is reached
while [ "$instance_status" != "running" ] && [ $elapsed_time -lt $max_time ]; do
echo "Current status: $instance_status. Waiting for 'running' status..."
sleep 5
elapsed_time=$((elapsed_time + 5))
instance_status=$(get_status)
done
# $OLLAMA_DIR & $LLM_DIR are defined in the .env file
ollama_directory=$(basename "$OLLAMA_DIR")
llm_directory=$(basename "$LLM_DIR")
# if status is "running", let's SSH into the instance
if [ "$instance_status" == "running" ]; then
echo "Instance is now running!"
# We need to generate an SSH key and add it to the machine
## but first, delete any that may exist
rm -fv vast_ssh_*
ssh-keygen -t ed25519 -f "$KEY_NAME" -N "$PASSPHRASE" -C "vast-box-$DATESTRING@$(hostname)"
vastai attach ssh "$instance_id" "$(cat "$KEY_NAME.pub")"
# wait for a few seconds before attempting to ssh into the instance
sleep 20
# disable tmux and create directories for ollama
ssh "$(vastai ssh-url "$instance_id")" -o StrictHostKeyChecking=no -i "$KEY_NAME" "touch ~/.no_auto_tmux; mkdir -p {~/$ollama_directory,~/$llm_directory}"
else
echo "Timeout reached. Instance did not change to 'running' within 5 minutes."
exit 1
fi
# Copy the custom model to the instance
# this may take a while depending on your internet connection.
# A better approach is probably to host your model files securely somewhere,
# then ssh into the vast.ai machine and download from there.
ssh_url=$(vastai ssh-url "$instance_id")
port=$(echo "$ssh_url" | cut -d ':' -f 3)
rsync -chavzP -e "ssh -o StrictHostKeyChecking=no -p $port -i $KEY_NAME" "$OLLAMA_DIR" "$(basename "$ssh_url" | cut -d ':' -f 1)":~/"$ollama_directory"
rsync -chavzP -e "ssh -o StrictHostKeyChecking=no -p $port -i $KEY_NAME" "$GGUF_FILE" "$(basename "$ssh_url" | cut -d ':' -f 1)":~/"$llm_directory/"
# now you can login and run ollama on the GPU instance
DESTROY_INSTANCE=false
{
# shellcheck disable=SC2087
ssh "$(vastai ssh-url "$instance_id")" -o StrictHostKeyChecking=no -i "$KEY_NAME" << EOF
# Exit immediately if a command exits with a non-zero status
set -e
OLLAMA_HOST=0.0.0.0 ollama serve > ollama.log 2>&1 &
cd ~/"$ollama_directory"
ollama create "$MODEL_NAME" -f "$MODEL_NAME/Modelfile"
EOF
} || handle_vastai_error "$DESTROY_INSTANCE"
# get details of the running instance.
json_output=$(vastai show instance "$instance_id" --raw)
# get the IP address and host port of the instance using jq
# NOTE: The output JSON appears to be improperly formatted due to line breaks or
# trailing commas, and I had trouble parsing it with jq. The sed & tr commands attempt to
# clean up the JSON before parsing it. We look for commas followed by any amount
# of whitespace and a closing brace, and replaces them with just the closing brace,
# effectively removing trailing commas before closing braces. This regex won't handle
# trailing commas in arrays correctly (],), so you might need to adjust it if needed.
# TODO: raise an issue with vast.ai to fix this.
ip_address=$(echo "$json_output" | sed 's/,\s*}/}/g' | tr -d '\n' | jq -r '.public_ipaddr')
http_port=$(echo "$json_output" | sed 's/,\s*}/}/g' | tr -d '\n' | jq -r '.ports["11434/tcp"][] | select(.HostIp == "0.0.0.0") | .HostPort')
echo "IP Address: $ip_address"
echo "Host Port: $http_port"
# update OLLAMA_API_BASE in .env with the new IP address and port
sed -i "s/^OLLAMA_API_BASE=.*/OLLAMA_API_BASE=http:\/\/$ip_address:$http_port/" .env
echo -e "\033[35mNow you should be able to use the ollama API in your wagtail project! 🚀\033[0m"
echo -e "\033[35mRemember to destroy the instance when you're done\033[0m"
echo -e " \033[31mvastai destroy instance \"\$instance_id\"\033[0m"
echo -e "\033[35m\nand cleanup 🗑️\033[0m"
echo -e " \033[31mrm -fv vast_ssh_* ssh_*.json\033[0m"
# destroy the instance when you're done
# vastai destroy instance "$instance_id"
# cleanup
# rm -fv vast_ssh_* ssh_*.json
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment