Skip to content

Instantly share code, notes, and snippets.

@philschmid
Created March 1, 2024 18:26
Show Gist options
  • Save philschmid/0ed1531fe28a11cb7f5d6bce4cb96f61 to your computer and use it in GitHub Desktop.
Save philschmid/0ed1531fe28a11cb7f5d6bce4cb96f61 to your computer and use it in GitHub Desktop.
#!/bin/bash
start=$(date +%s)
# Initialize RESULT_DIRECTORY with default value and HF_MODEL_ID with an empty string
RESULT_DIRECTORY="nous"
HF_MODEL_ID=""
TRUST_REMOTE_CODE="False"
CURRENT_DIR=$(pwd)
# List of Benchmarking Tasks
BENCMARK="agieval gpt4all truthfulqa bigbench" # whitespace-separated list of benchmarking tasks
# Flag to check if HF_MODEL_ID is provided
HF_MODEL_ID_PROVIDED=0
# Parse command line arguments
while [[ "$#" -gt 0 ]]; do
case $1 in
--model-id) HF_MODEL_ID="$2"; HF_MODEL_ID_PROVIDED=1; shift ;; # If --HF_MODEL_ID is provided, assign the next argument as its value and set the flag
--save-directory) RESULT_DIRECTORY="$2"; shift ;; # If --save-directory is provided, assign the next argument as its value
--benchmark) BENCMARK="$2"; shift ;; # If --benchmark is provided, assign the next argument as its value
--trust-remote-code) TRUST_REMOTE_CODE="True"; shift ;; # Assign next argument as value for trust_remote_code
*) echo "Unknown parameter: $1"; exit 1 ;; # If an unknown parameter is provided, exit script
esac
shift # Move to next argument
done
# Check if HF_MODEL_ID was provided
if [[ $HF_MODEL_ID_PROVIDED -eq 0 ]]; then
echo "--HF_MODEL_ID parameter is required."
exit 1
fi
# Detect the number of NVIDIA GPUs and create a device string
gpu_count=$(nvidia-smi -L | wc -l)
if [ $gpu_count -eq 0 ]; then
echo "No NVIDIA GPUs detected. Exiting."
exit 1
fi
# Construct the CUDA device string
cuda_devices=""
for ((i=0; i<gpu_count; i++)); do
if [ $i -gt 0 ]; then
cuda_devices+=","
fi
cuda_devices+="$i"
done
#### ECHO PARAMETERS ####
echo "HF_MODEL_ID: $HF_MODEL_ID"
echo "RESULT_DIRECTORY: $RESULT_DIRECTORY"
echo "TRUST_REMOTE_CODE: $TRUST_REMOTE_CODE"
echo "CUDA_DEVICES: $cuda_devices"
echo "BENCHMARK: $BENCMARK"
# Clone respository and install requirements for Benchmarking
mkdir -p $RESULT_DIRECTORY
cd $RESULT_DIRECTORY
git clone -b add-agieval https://github.com/dmahan93/lm-evaluation-harness
cd lm-evaluation-harness
pip install -e .
pip install -q requests accelerate sentencepiece pytablewriter einops protobuf
# #################### Run Nous Benchmarking ####################
for bench in $BENCMARK; do
echo "Running $bench benchmark"
if [ $bench == "agieval" ]; then
tasks="agieval_aqua_rat,agieval_logiqa_en,agieval_lsat_ar,agieval_lsat_lr,agieval_lsat_rc,agieval_sat_en,agieval_sat_en_without_passage,agieval_sat_math"
elif [ $bench == "gpt4all" ]; then
tasks="hellaswag,openbookqa,winogrande,arc_easy,arc_challenge,boolq,piqa"
elif [ $bench == "truthfulqa" ]; then
tasks="truthfulqa_mc"
elif [ $bench == "bigbench" ]; then
tasks="bigbench_causal_judgement,bigbench_date_understanding,bigbench_disambiguation_qa,bigbench_geometric_shapes,bigbench_logical_deduction_five_objects,bigbench_logical_deduction_seven_objects,bigbench_logical_deduction_three_objects,bigbench_movie_recommendation,bigbench_navigate,bigbench_reasoning_about_colored_objects,bigbench_ruin_names,bigbench_salient_translation_error_detection,bigbench_snarks,bigbench_sports_understanding,bigbench_temporal_sequences,bigbench_tracking_shuffled_objects_five_objects,bigbench_tracking_shuffled_objects_seven_objects,bigbench_tracking_shuffled_objects_three_objects"
else
echo "Unknown benchmark: $bench"
exit 1
fi
echo "Tasks: $tasks"
# Run the evaluation harness
python main.py \
--model hf-causal \
--model_args pretrained=$HF_MODEL_ID,trust_remote_code=$TRUST_REMOTE_CODE \
--tasks $tasks \
--device cuda:$cuda_devices \
--batch_size auto \
--output_path ${RESULT_DIRECTORY}/${bench}.json
done
end=$(date +%s)
echo "Elapsed Time: $((end - start)) seconds"
# Options to run
# **Single Benchmark**
# ./run_nous.sh --model-id HuggingFaceH4/zephyr-7b-gemma-v0.1 --save-directory /path/to/your --benchmark truthfulqa
# **All Benchmarks**
# ./run_nous.sh --model-id HuggingFaceH4/zephyr-7b-gemma-v0.1 --save-directory /path/to/your
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment