AIAnytime/eval.py

## eval.py
## With Evaluation Harness
!pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git
!pip install bitsandbytes
!pip install --upgrade transformers
!pip install auto-gptq optimum autoawq

!lm_eval --model hf --model_args pretrained=google/gemma-7b --tasks winogrande,hellaswag,arc_challenge --device cuda:0 --num_fewshot 1 --batch_size 8 --output_path ./eval_harness/gemma-7b

!lm_eval --model hf --model_args pretrained=google/gemma-7b --tasks winogrande,hellaswag,arc_challenge --device cuda:0 --num_fewshot 5 --batch_size 8 --output_path ./eval_harness/gemma-7b-5shot

## The following cell runs the evaluation of the LoRA adapter on hellaswag, arc_challenge, and winogrande in a zero-shot setting.

## For LoRA adapters
!lm_eval --model hf \
  --model_args pretrained=mistralai/Mistral-7B-v0.1,peft=./drive/MyDrive/v2_mistral7b_lora_results/checkpoint-1230 \
  --tasks hellaswag,arc_challenge,winogrande --device cuda:0 --num_fewshot 0 --batch_size 4 --output_path ./eval_harness/Mistral7B-LoRA --log_samples
	## With Evaluation Harness
	!pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git
	!pip install bitsandbytes
	!pip install --upgrade transformers
	!pip install auto-gptq optimum autoawq

	!lm_eval --model hf --model_args pretrained=google/gemma-7b --tasks winogrande,hellaswag,arc_challenge --device cuda:0 --num_fewshot 1 --batch_size 8 --output_path ./eval_harness/gemma-7b

	!lm_eval --model hf --model_args pretrained=google/gemma-7b --tasks winogrande,hellaswag,arc_challenge --device cuda:0 --num_fewshot 5 --batch_size 8 --output_path ./eval_harness/gemma-7b-5shot

	## The following cell runs the evaluation of the LoRA adapter on hellaswag, arc_challenge, and winogrande in a zero-shot setting.

	## For LoRA adapters
	!lm_eval --model hf \
	--model_args pretrained=mistralai/Mistral-7B-v0.1,peft=./drive/MyDrive/v2_mistral7b_lora_results/checkpoint-1230 \
	--tasks hellaswag,arc_challenge,winogrande --device cuda:0 --num_fewshot 0 --batch_size 4 --output_path ./eval_harness/Mistral7B-LoRA --log_samples