Skip to content

Instantly share code, notes, and snippets.

@dcbark01
Last active May 9, 2024 12:13
Show Gist options
  • Save dcbark01/5ac0dfaa86302282840d0eff4025bd37 to your computer and use it in GitHub Desktop.
Save dcbark01/5ac0dfaa86302282840d0eff4025bd37 to your computer and use it in GitHub Desktop.
Huggingface Text Generation Inference SLURM
#!/bin/bash
#SBATCH --job-name=llm-swarm
#SBATCH --partition hopper-prod
#SBATCH --gpus={{gpus}}
#SBATCH --cpus-per-task=12
#SBATCH --mem-per-cpu=11G
#SBATCH -o slurm/logs/%x_%j.out
# See original source here:
# https://github.com/huggingface/llm-swarm/blob/main/templates/tgi_h100.template.slurm
# For HF cluster internal users: Check if /fsx directory exists
if [ -d "/fsx/.cache" ]; then
export volume="/fsx/.cache"
else
export volume=".cache"
fi
export model={{model}}
export revision={{revision}}
function unused_port() {
N=${1:-1}
comm -23 \
<(seq "1025" "65535" | sort) \
<(ss -Htan |
awk '{print $4}' |
cut -d':' -f2 |
sort -u) |
shuf |
head -n "$N"
}
export PORT=$(unused_port)
if [ -z "$HUGGING_FACE_HUB_TOKEN" ]; then
# try reading from file
export HUGGING_FACE_HUB_TOKEN=$(cat ~/.cache/huggingface/token)
fi
echo "Starting TGI container port $PORT"
echo "http://$(hostname -I | awk '{print $1}'):$PORT" >> {{slurm_hosts_path}}
# unset cache dirs to avoid pyxis having host env var somehow get into the container
unset HF_HUB_CACHE HF_ASSETS_CACHE HF_DATASETS_CACHE HF_MODULES_CACHE
srun --container-image='ghcr.io#huggingface/text-generation-inference' \
--container-env=HUGGING_FACE_HUB_TOKEN,PORT \
--container-mounts="$volume:/data" \
--no-container-mount-home \
--qos normal \
/usr/local/bin/text-generation-launcher \
--model-id $model \
--revision $revision \
--max-concurrent-requests 2000 \
--max-total-tokens {{model_max_length}} \
--max-input-length {{model_input_length}} \
--max-batch-prefill-tokens {{model_max_length}} \
echo "End of job"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment