Last active
August 7, 2023 01:25
-
-
Save w32zhong/9bf98e3d9aa9b32854d1e389cdf3d666 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
conda create --name llama -c conda-forge python=3.8 | |
conda activate llama | |
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118 | |
python -c 'import torch; print(torch.cuda.is_available())' | |
conda install -c conda-forge gxx_linux-64=10.4.0 | |
conda install cuda -c nvidia/label/cuda-11.8.0 | |
#pip install packaging flash-attn | |
#conda install -c conda-forge cudatoolkit-dev | |
pip install transformers=4.28.1 | |
if false; then | |
conda install -c conda-forge tokenizers=0.13.3 | |
pip install accelerate==0.18.0 | |
pip install sentencepiece==0.1.98 | |
pip install protobuf==3.20 | |
git clone --depth 1 --branch v4.29.2 git@github.com:huggingface/transformers.git | |
cd transformers/src/ | |
export PYTHONPATH=`pwd` | |
python transformers/models/llama/convert_llama_weights_to_hf.py -h | |
conda install -c conda-forge aria2 | |
aria2c --file-allocation=none 'magnet:?xt=urn:btih:b8287ebfa04f879b048d4d4404108cf3e8014352&dn=LLaMA&tr=udp%3a%2f%2ftracker.opentrackr.org%3a1337%2fannounce' | |
python transformers/models/llama/convert_llama_weights_to_hf.py --input_dir /store2/scratch/w32zhong/llama/ --model_size 30B --output_dir /store2/scratch/w32zhong/llama/30B-hgf/ | |
fi | |
pip install fschat einops | |
B=1 | |
N=4 | |
export CUDA_VISIBLE_DEVICES=0,1,3,4 | |
rm -rf output | |
torchrun --nproc_per_node=$N --master_port=20001 fastchat/train/train_mem.py \ | |
--model_name_or_path ../7B-hgf/ \ | |
--data_path playground/data/dummy.json \ | |
--bf16 True \ | |
--output_dir output \ | |
--num_train_epochs 3 \ | |
--per_device_train_batch_size $B \ | |
--per_device_eval_batch_size $B \ | |
--gradient_accumulation_steps 16 \ | |
--evaluation_strategy "no" \ | |
--save_strategy "steps" \ | |
--save_steps 1200 \ | |
--save_total_limit 10 \ | |
--learning_rate 2e-5 \ | |
--weight_decay 0. \ | |
--warmup_ratio 0.03 \ | |
--lr_scheduler_type "cosine" \ | |
--logging_steps 1 \ | |
--fsdp "full_shard auto_wrap" \ | |
--fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \ | |
--model_max_length 2048 \ | |
--gradient_checkpointing True \ | |
--lazy_preprocess True \ | |
--tf32 True \ | |
pip install deepspeed | |
pip install git+https://github.com/huggingface/peft | |
#deepspeed train_lora.py --deepspeed <$PATH_TO_DEEPSPEED_CONFIG> | |
#exit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import LlamaForCausalLM | |
from transformers import LlamaTokenizerFast | |
from peft import LoraConfig, get_peft_model | |
def print_trainable_parameters(model): | |
trainable_params = 0 | |
all_param = 0 | |
for _, param in model.named_parameters(): | |
all_param += param.numel() | |
if param.requires_grad: | |
trainable_params += param.numel() | |
print( | |
f"trainable params: {trainable_params:,} || all params: {all_param:,} || trainable%: {100 * trainable_params / all_param:,}" | |
) | |
tokenizer = LlamaTokenizerFast.from_pretrained('7B-hgf') | |
model = LlamaForCausalLM.from_pretrained('7B-hgf') | |
TARGET_MODULES = [ | |
"q_proj", | |
"v_proj", | |
] | |
lora_config = LoraConfig( | |
task_type="CAUSAL_LM", | |
r=8, lora_dropout=0.05, | |
lora_alpha=16, bias='none', | |
target_modules=TARGET_MODULES, | |
) | |
model = get_peft_model(model, lora_config) | |
print_trainable_parameters(model) | |
with torch.backends.cuda.sdp_kernel( | |
enable_flash=True, | |
enable_math=False, | |
enable_mem_efficient=False | |
): | |
print(torch.backends.cuda.flash_sdp_enabled()) | |
print(torch.backends.cuda.mem_efficient_sdp_enabled()) | |
print(torch.backends.cuda.math_sdp_enabled()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment