fullstackwebdev/gist:386c7abaf32ab4201aab648a7009eda8

## gistfile1.txt

1. I cloned the repo
git clone https://github.com/Blealtan/RWKV-LM-LoRA

2. I download 7B Instruct

wget https://huggingface.co/BlinkDL/rwkv-4-pile-7b/resolve/main/RWKV-4-Pile-7B-Instruct-test2-20230209.pth

(in my home directory /home/shazam )

3. installed conda
wget https://repo.anaconda.com/archive/Anaconda3-2022.10-Linux-x86_64.sh
sh ./Anaconda3-2022.10-Linux-x86_64.sh

4. Created conda env "rwkv" with python 3.10
conda create -n rwkv python=3.10
conda activate rwkv

5. Go into the repo
cd RWKV-LM-LoRA

6. Install packages like this:
pip install torch
pip install numpy==1.23
pip install transformers

pip install pytorch-lightning==1.7.5
pip install deepspeed==0.7.0

7. (you might not have to do this) For me, my 3090 was on CUDA 11.7 so I downloaded CUDA stuff with conda like this:

conda install cudart -c nvidia/label/cuda-11.7.0
conda install cuda -c nvidia/label/cuda-11.7.0
conda install -c "nvidia/label/cuda-11.7.0" cuda-nvcc

8. (you might not have to do this) For whatever reason... I had to set these flags after finding the files

find ~ -name '*cudart*so'
export LDFLAGS=-L/home/shazam/anaconda3/envs/rwkv/lib/

9. (you might not have to do this) It also wanted them in folder called lib64

cd /home/shazam/anaconda3/envs/rwkv/
cp -favr lib lib64

10.

Then I was able to run:


python3 ./train.py \
  --load_model /home/shazam/RWKV-4-Pile-7B-Instruct-test2-20230209.pth \
  --proj_dir ./tuned \
  --data_file /home/shazam/train.npy \
  --data_type numpy \
  --vocab_size 50277 \
  --ctx_len 1024 \
  --epoch_save 1 \
  --epoch_count 100 \
  --n_layer 32 \
  --n_embd 4096 \
  --epoch_steps 1000 \
  --epoch_begin 0 \
  --micro_bsz 1 \
  --pre_ffn 0 \
  --head_qk 0 \
  --lr_init 1e-5 \
  --lr_final 1e-5 \
  --warmup_steps 0 \
  --beta1 0.9 \
  --beta2 0.999 \
  --adam_eps 1e-8 \
  --accelerator gpu \
  --devices 1 \
  --precision bf16 \
  --strategy deepspeed_stage_2 \
  --grad_cp 0 \
  --lora \
  --lora-r 8 \
  --lora-alpha 32 \
  --lora-dropout 0.01


-----


11. Then in chat I just modified it to use ./tuned/rwkv-<yourcheckpoint>
+args.MODEL_NAME = './tuned/rwkv-2'
args.n_layer = 32
args.n_embd = 4096
args.ctx_len = 1024


python3 ./train.py   --load_model ~/RWKV-4-Pile-7B-Instruct-test2-20230209.pth   --proj_dir ./tuned   --data_file ~/train.npy   --data_type "numpy"   --vocab_size 50277 --ctx_len 1024   --epoch_save 1 --epoch_count 100   --n_layer 32   --n_embd 4096   --epoch_steps 1000 --epoch_begin 0  --micro_bsz 1 --pre_ffn 0 --head_qk 0 --lr_init 1e-5 --lr_final 1e-5 --warmup_steps 0 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 --accelerator gpu --devices 1 --precision bf16 --strategy deepspeed_stage_2 --grad_cp 0   --lora --lora-r 8 --lora-alpha 32 --lora-dropout 0.01
python3 ./train.py --load_model /home/shazam/RWKV-4-Pile-7B-Instruct-test2-20230209.pth --proj_dir ./tuned --data_file /home/shazam/train.npy --data_type numpy --vocab_size 50277 --ctx_len 1024 --epoch_save 1 --epoch_count 100 --n_layer 32 --n_embd 4096 --epoch_steps 1000 --epoch_begin 0 --micro_bsz 1 --pre_ffn 0 --head_qk 0 --lr_init 1e-5 --lr_final 1e-5 --warmup_steps 0 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 --accelerator gpu --devices 1 --precision bf16 --strategy deepspeed_stage_2 --grad_cp 0 --lora --lora-r 8 --lora-alpha 32 --lora-dropout 0.01


python3 ./train.py   --load_model ~/RWKV-4-Pile-7B-Instruct-test2-20230209.pth   --proj_dir ./tuned   --data_file ~/train.npy   --data_type "numpy"   --vocab_size 50277 --ctx_len 1024   --epoch_save 1 --epoch_count 100   --n_layer 32   --n_embd 4096   --epoch_steps 1000 --epoch_begin 0  --micro_bsz 1 --pre_ffn 0 --head_qk 0 --lr_init 1e-5 --lr_final 1e-5 --warmup_steps 0 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 --accelerator gpu --devices 1 --precision bf16 --strategy deepspeed_stage_2 --grad_cp 0   --lora --lora-r 8 --lora-alpha 32 --lora-dropout 0.01

	1. I cloned the repo
	git clone https://github.com/Blealtan/RWKV-LM-LoRA

	2. I download 7B Instruct

	wget https://huggingface.co/BlinkDL/rwkv-4-pile-7b/resolve/main/RWKV-4-Pile-7B-Instruct-test2-20230209.pth

	(in my home directory /home/shazam )

	3. installed conda
	wget https://repo.anaconda.com/archive/Anaconda3-2022.10-Linux-x86_64.sh
	sh ./Anaconda3-2022.10-Linux-x86_64.sh

	4. Created conda env "rwkv" with python 3.10
	conda create -n rwkv python=3.10
	conda activate rwkv

	5. Go into the repo
	cd RWKV-LM-LoRA

	6. Install packages like this:
	pip install torch
	pip install numpy==1.23
	pip install transformers

	pip install pytorch-lightning==1.7.5
	pip install deepspeed==0.7.0

	7. (you might not have to do this) For me, my 3090 was on CUDA 11.7 so I downloaded CUDA stuff with conda like this:

	conda install cudart -c nvidia/label/cuda-11.7.0
	conda install cuda -c nvidia/label/cuda-11.7.0
	conda install -c "nvidia/label/cuda-11.7.0" cuda-nvcc

	8. (you might not have to do this) For whatever reason... I had to set these flags after finding the files

	find ~ -name 'cudartso'
	export LDFLAGS=-L/home/shazam/anaconda3/envs/rwkv/lib/

	9. (you might not have to do this) It also wanted them in folder called lib64

	cd /home/shazam/anaconda3/envs/rwkv/
	cp -favr lib lib64

	10.

	Then I was able to run:


	python3 ./train.py \
	--load_model /home/shazam/RWKV-4-Pile-7B-Instruct-test2-20230209.pth \
	--proj_dir ./tuned \
	--data_file /home/shazam/train.npy \
	--data_type numpy \
	--vocab_size 50277 \
	--ctx_len 1024 \
	--epoch_save 1 \
	--epoch_count 100 \
	--n_layer 32 \
	--n_embd 4096 \
	--epoch_steps 1000 \
	--epoch_begin 0 \
	--micro_bsz 1 \
	--pre_ffn 0 \
	--head_qk 0 \
	--lr_init 1e-5 \
	--lr_final 1e-5 \
	--warmup_steps 0 \
	--beta1 0.9 \
	--beta2 0.999 \
	--adam_eps 1e-8 \
	--accelerator gpu \
	--devices 1 \
	--precision bf16 \
	--strategy deepspeed_stage_2 \
	--grad_cp 0 \
	--lora \
	--lora-r 8 \
	--lora-alpha 32 \
	--lora-dropout 0.01


	-----




	11. Then in chat I just modified it to use ./tuned/rwkv-<yourcheckpoint>
	+args.MODEL_NAME = './tuned/rwkv-2'
	args.n_layer = 32
	args.n_embd = 4096
	args.ctx_len = 1024



	python3 ./train.py --load_model ~/RWKV-4-Pile-7B-Instruct-test2-20230209.pth --proj_dir ./tuned --data_file ~/train.npy --data_type "numpy" --vocab_size 50277 --ctx_len 1024 --epoch_save 1 --epoch_count 100 --n_layer 32 --n_embd 4096 --epoch_steps 1000 --epoch_begin 0 --micro_bsz 1 --pre_ffn 0 --head_qk 0 --lr_init 1e-5 --lr_final 1e-5 --warmup_steps 0 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 --accelerator gpu --devices 1 --precision bf16 --strategy deepspeed_stage_2 --grad_cp 0 --lora --lora-r 8 --lora-alpha 32 --lora-dropout 0.01
	python3 ./train.py --load_model /home/shazam/RWKV-4-Pile-7B-Instruct-test2-20230209.pth --proj_dir ./tuned --data_file /home/shazam/train.npy --data_type numpy --vocab_size 50277 --ctx_len 1024 --epoch_save 1 --epoch_count 100 --n_layer 32 --n_embd 4096 --epoch_steps 1000 --epoch_begin 0 --micro_bsz 1 --pre_ffn 0 --head_qk 0 --lr_init 1e-5 --lr_final 1e-5 --warmup_steps 0 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 --accelerator gpu --devices 1 --precision bf16 --strategy deepspeed_stage_2 --grad_cp 0 --lora --lora-r 8 --lora-alpha 32 --lora-dropout 0.01



	python3 ./train.py --load_model ~/RWKV-4-Pile-7B-Instruct-test2-20230209.pth --proj_dir ./tuned --data_file ~/train.npy --data_type "numpy" --vocab_size 50277 --ctx_len 1024 --epoch_save 1 --epoch_count 100 --n_layer 32 --n_embd 4096 --epoch_steps 1000 --epoch_begin 0 --micro_bsz 1 --pre_ffn 0 --head_qk 0 --lr_init 1e-5 --lr_final 1e-5 --warmup_steps 0 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 --accelerator gpu --devices 1 --precision bf16 --strategy deepspeed_stage_2 --grad_cp 0 --lora --lora-r 8 --lora-alpha 32 --lora-dropout 0.01