Last active
October 2, 2023 15:55
-
-
Save Delaunay/816072ba6caef1dbecef4439eeb94b02 to your computer and use it in GitHub Desktop.
Run LLAMA2 on a Slurm cluster
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Assume you have conda installed | |
# Clone the LLAMA code and create a new environment | |
# | |
# Usage: | |
# | |
# sbatch --ntasks-per-node=1 --mem=32G llama2.sh llama-2-7b-chat | |
# sbatch --ntasks-per-node=2 --mem=64G llama2.sh llama-2-13b-chat | |
# sbatch --ntasks-per-node=8 --mem=192G llama2.sh llama-2-70b-chat | |
# | |
#SBATCH --gpus-per-task=rtx8000:1 | |
#SBATCH --cpus-per-task=4 | |
#SBATCH --time=00:15:00 | |
# Defaults | |
#SBATCH --ntasks-per-node=2 | |
#SBATCH --mem=64G | |
WEIGHTS="/network/weights//llama.var/llama2/" | |
MODEL="$1" | |
declare -A GPUS | |
GPUS["llama-2-7b-chat"]=1 | |
GPUS["llama-2-7b"]=1 | |
GPUS["llama-2-13b-chat"]=2 | |
GPUS["llama-2-13b"]=2 | |
GPUS["llama-2-70b-chat"]=8 | |
GPUS["llama-2-70b"]=8 | |
NTASKS_PER_NODE=$SLURM_NTASKS_PER_NODE | |
if [ ${GPUS[$MODEL]+_} ] && [ "${GPUS[$MODEL]}" -eq "$NTASKS_PER_NODE" ]; then | |
cd $SLURM_TMPDIR | |
git clone https://github.com/facebookresearch/llama.git | |
cd llama | |
CONDA_EXEC="$(which conda)" | |
CONDA_BASE=$(dirname $CONDA_EXEC) | |
source $CONDA_BASE/../etc/profile.d/conda.sh | |
conda create --prefix ./env python=3.9 -y | |
conda activate ./env | |
pip install -e . | |
torchrun --nproc_per_node ${GPUS[$MODEL]} \ | |
example_chat_completion.py \ | |
--ckpt_dir "$WEIGHTS/$MODEL" \ | |
--tokenizer_path "$WEIGHTS/tokenizer.model" \ | |
--max_seq_len 512 \ | |
--max_batch_size 6 | |
else | |
echo "Model $MODEL is not known or GPU count insufficient" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment