Last active
February 24, 2025 20:48
job.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# skypilot training script example | |
envs: | |
COMET_API_KEY: xxxxxxxxxxxxxxxxx | |
COMET_PROJECT_NAME: example-project | |
COMET_WORKSPACE: your-workspace | |
COMET_MODE: ONLINE | |
HF_TOKEN: hf_xxxxxxxxxxxxxxxx | |
resources: | |
cloud: gcp | |
accelerators: A100-80GB:4 | |
# region: us-east4 | |
disk_size: 256 | |
# Create reproducible conda cenvironment | |
setup: | | |
if command -v conda &> /dev/null; then | |
conda config --set auto_activate_base false | |
conda init --reverse --all | |
conda deactivate | |
source $HOME/.bashrc | |
fi | |
# Check if micromamba is installed | |
if ! command -v micromamba &> /dev/null; then | |
echo "micromamba not found, installing..." | |
curl -fsSL https://raw.githubusercontent.com/mamba-org/micromamba-releases/main/install.sh -o install.sh | |
# Modify install.sh script for customized installation | |
sed -i 's/read BIN_FOLDER/BIN_FOLDER="${HOME}\/.local\/bin"/g' install.sh | |
sed -i 's/read INIT_YES/INIT_YES="yes"/g' install.sh | |
sed -i 's/read CONDA_FORGE_YES/CONDA_FORGE_YES="yes"/g' install.sh | |
sed -i 's/read PREFIX_LOCATION/PREFIX_LOCATION="${HOME}\/micromamba"/g' install.sh | |
# Run the installation script | |
${SHELL} install.sh | |
source $HOME/.bashrc | |
else | |
echo "micromamba is already installed" | |
fi | |
micromamba activate env | |
if [ $? -ne 0 ]; then | |
micromamba create -n env -c pytorch -c nvidia -c xformers -y | |
micromamba activate env | |
echo 'export CUDA_HOME=$CONDA_PREFIX' > $CONDA_PREFIX/etc/conda/activate.d/cuda_home.sh | |
chmod +x $CONDA_PREFIX/etc/conda/activate.d/cuda_home.sh | |
git clone https://github.com/huggingface/alignment-handbook | |
cd alignment-handbook | |
python -m pip install -e . | |
python -m pip install huggingface_hub | |
python -m pip install comet_ml | |
python -m pip install liger-kernel | |
MAX_JOBS=2 python -m pip install flash-attn --no-build-isolation | |
git config --global credential.helper store | |
fi | |
workdir: . | |
file_mounts: | |
/config: | |
name: your-s3-bucket | |
source: . | |
persistent: False | |
mode: COPY | |
/data: | |
source: gs://your-training-checkpoints | |
mode: MOUNT | |
run: | | |
env | |
micromamba activate env | |
cd alignment-handbook | |
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/fsdp_qlora.yaml --num_processes=4 /config/run_sft.py /config/config_qlora.yaml |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment