Skip to content

Instantly share code, notes, and snippets.

@bigsnarfdude
Last active May 17, 2024 23:34
Show Gist options
  • Save bigsnarfdude/81e4f153caaefd1777f39dd26af03105 to your computer and use it in GitHub Desktop.
Save bigsnarfdude/81e4f153caaefd1777f39dd26af03105 to your computer and use it in GitHub Desktop.
lm-eval.sh
MODEL_ID="vincentoh/llama3-alpaca-dpo-instruct"
TRUST_REMOTE_CODE="yes"
DTYPE="bfloat16"
BATCH_SIZE="auto"
CUDA_DEVICES=0
sudo apt update
sudo apt install -y screen vim git-lfs
pip install -q requests accelerate sentencepiece pytablewriter einops protobuf huggingface_hub==0.21.4
pip install -U transformers
pip install -U torch
huggingface-cli login
git clone -b add-agieval https://github.com/dmahan93/lm-evaluation-harness
cd lm-evaluation-harness
pip install -e .
benchmark="agieval"
python main.py --model hf-causal --model_args pretrained=$MODEL_ID,trust_remote_code=$TRUST_REMOTE_CODE,dtype=$DTYPE \
--tasks agieval_aqua_rat,agieval_logiqa_en,agieval_lsat_ar,agieval_lsat_lr,agieval_lsat_rc,agieval_sat_en,agieval_sat_en_without_passage,agieval_sat_math \
--device cuda:$CUDA_DEVICES \
--batch_size $BATCH_SIZE \
--output_path ./${benchmark}.json
benchmark="gpt4all"
echo "================== $(echo $benchmark | tr '[:lower:]' '[:upper:]') [2/4] =================="
python main.py \
--model hf-causal \
--model_args pretrained=$MODEL_ID,trust_remote_code=$TRUST_REMOTE_CODE,dtype=$DTYPE \
--tasks hellaswag,openbookqa,winogrande,arc_easy,arc_challenge,boolq,piqa \
--device cuda:$CUDA_DEVICES \
--batch_size $BATCH_SIZE \
--output_path ./${benchmark}.json
benchmark="truthfulqa"
echo "================== $(echo $benchmark | tr '[:lower:]' '[:upper:]') [3/4] =================="
python main.py \
--model hf-causal \
--model_args pretrained=$MODEL_ID,trust_remote_code=$TRUST_REMOTE_CODE,dtype=$DTYPE \
--tasks truthfulqa_mc \
--device cuda:$CUDA_DEVICES \
--batch_size $BATCH_SIZE \
--output_path ./${benchmark}.json
benchmark="bigbench"
echo "================== $(echo $benchmark | tr '[:lower:]' '[:upper:]') [4/4] =================="
python main.py \
--model hf-causal \
--model_args pretrained=$MODEL_ID,trust_remote_code=$TRUST_REMOTE_CODE,dtype=$DTYPE \
--tasks bigbench_causal_judgement,bigbench_date_understanding,bigbench_disambiguation_qa,bigbench_geometric_shapes,bigbench_logical_deduction_five_objects,bigbench_logical_deduction_seven_objects,bigbench_logical_deduction_three_objects,bigbench_movie_recommendation,bigbench_navigate,bigbench_reasoning_about_colored_objects,bigbench_ruin_names,bigbench_salient_translation_error_detection,bigbench_snarks,bigbench_sports_understanding,bigbench_temporal_sequences,bigbench_tracking_shuffled_objects_five_objects,bigbench_tracking_shuffled_objects_seven_objects,bigbench_tracking_shuffled_objects_three_objects \
--device cuda:$CUDA_DEVICES \
--batch_size $BATCH_SIZE \
--output_path ./${benchmark}.json
scp ubuntu@129.153.76.104:/home/ubuntu/lm-evaluation-harness/agieval.json .
scp ubuntu@129.153.76.104:/home/ubuntu/lm-evaluation-harness/gpt4all.json .
scp ubuntu@129.153.76.104:/home/ubuntu/lm-evaluation-harness/pile_statistics.json .
scp ubuntu@129.153.76.104:/home/ubuntu/lm-evaluation-harness/truthfulqa.json .
scp ubuntu@129.153.76.104:/home/ubuntu/lm-evaluation-harness/bigbench.json .
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment