Skip to content

Instantly share code, notes, and snippets.

@binarycrayon
Last active February 24, 2025 20:48
job.yaml
# skypilot training script example
envs:
COMET_API_KEY: xxxxxxxxxxxxxxxxx
COMET_PROJECT_NAME: example-project
COMET_WORKSPACE: your-workspace
COMET_MODE: ONLINE
HF_TOKEN: hf_xxxxxxxxxxxxxxxx
resources:
cloud: gcp
accelerators: A100-80GB:4
# region: us-east4
disk_size: 256
# Create reproducible conda cenvironment
setup: |
if command -v conda &> /dev/null; then
conda config --set auto_activate_base false
conda init --reverse --all
conda deactivate
source $HOME/.bashrc
fi
# Check if micromamba is installed
if ! command -v micromamba &> /dev/null; then
echo "micromamba not found, installing..."
curl -fsSL https://raw.githubusercontent.com/mamba-org/micromamba-releases/main/install.sh -o install.sh
# Modify install.sh script for customized installation
sed -i 's/read BIN_FOLDER/BIN_FOLDER="${HOME}\/.local\/bin"/g' install.sh
sed -i 's/read INIT_YES/INIT_YES="yes"/g' install.sh
sed -i 's/read CONDA_FORGE_YES/CONDA_FORGE_YES="yes"/g' install.sh
sed -i 's/read PREFIX_LOCATION/PREFIX_LOCATION="${HOME}\/micromamba"/g' install.sh
# Run the installation script
${SHELL} install.sh
source $HOME/.bashrc
else
echo "micromamba is already installed"
fi
micromamba activate env
if [ $? -ne 0 ]; then
micromamba create -n env -c pytorch -c nvidia -c xformers -y
micromamba activate env
echo 'export CUDA_HOME=$CONDA_PREFIX' > $CONDA_PREFIX/etc/conda/activate.d/cuda_home.sh
chmod +x $CONDA_PREFIX/etc/conda/activate.d/cuda_home.sh
git clone https://github.com/huggingface/alignment-handbook
cd alignment-handbook
python -m pip install -e .
python -m pip install huggingface_hub
python -m pip install comet_ml
python -m pip install liger-kernel
MAX_JOBS=2 python -m pip install flash-attn --no-build-isolation
git config --global credential.helper store
fi
workdir: .
file_mounts:
/config:
name: your-s3-bucket
source: .
persistent: False
mode: COPY
/data:
source: gs://your-training-checkpoints
mode: MOUNT
run: |
env
micromamba activate env
cd alignment-handbook
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/fsdp_qlora.yaml --num_processes=4 /config/run_sft.py /config/config_qlora.yaml
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment