Skip to content

Instantly share code, notes, and snippets.

@sirodoht
Created April 19, 2023 16:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sirodoht/5a64889be2384b05e6617478482391c1 to your computer and use it in GitHub Desktop.
Save sirodoht/5a64889be2384b05e6617478482391c1 to your computer and use it in GitHub Desktop.
sudo apt update
sudo apt install -y vim git python3-dev gcc g++ make unzip tree python3.10-venv iftop ripgrep
sudo -v ; curl https://rclone.org/install.sh | sudo bash
mkdir -p ~/.config/rclone/
vim ~/.config/rclone/rclone.conf
# ~/.config/rclone/rclone.conf
[r2]
type = s3
provider = Cloudflare
access_key_id = xxx
secret_access_key = xxx
endpoint = https://xxx.r2.cloudflarestorage.com
acl = private
rclone tree r2:
rclone copy r2:/llama/models/7B ./models/7B
rclone copy r2:/llama/models/tokenizer.model ./models/tokenizer.model
git clone https://github.com/huggingface/transformers
python3 -m venv .venv
source .venv/bin/activate
pip install torch~=1.13
pip install tokenizers==0.13.3
pip install protobuf~=3.20
pip install accelerate sentencepiece
git clone https://github.com/tatsu-lab/stanford_alpaca
cd stanford_alpaca
pip install -r requirements.txt
# on host:
scp output.json ubuntu@xxx.coreweave.cloud:/home/ubuntu/
cd transformers
python src/transformers/models/llama/convert_llama_weights_to_hf.py --input_dir ../models --model_size 7B --output_dir ../hf-output
python convert_llama_weights_to_hf.py --input_dir ./models/ --model_size 7B --output_dir ./hf-output
torchrun \
--nproc_per_node=4 \
--master_port=55444 \
train.py \
--model_name_or_path ../hf-output/ \
--data_path ../output.json \
--bf16 True \
--output_dir ../training-out/ \
--num_train_epochs 3 \
--per_device_train_batch_size 4 \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 8 \
--evaluation_strategy "no" \
--save_strategy "no" \
--learning_rate 2e-5 \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 100000 \
--fsdp "full_shard auto_wrap" \
--fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \
--tf32 True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment