Skip to content

Instantly share code, notes, and snippets.

@mwakaba2
Last active October 1, 2022 13:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save mwakaba2/7b88de0157a32f1edf23e058e061c945 to your computer and use it in GitHub Desktop.
Save mwakaba2/7b88de0157a32f1edf23e058e061c945 to your computer and use it in GitHub Desktop.
Bash script to execute Bert Base Uncased Model benchmark tests in GCP (multiple threads)
#!/bin/bash
MACHINE_INFO_SCRIPT="onnxruntime/onnxruntime/python/tools/transformers/machine_info.py"
BENCHMARK_SCRIPT="onnxruntime/onnxruntime/python/tools/transformers/benchmark.py"
ITERATIONS=100
BATCH_SIZE=1
SEQUENCE_LENGTH=128
MODEL_NAME="bert-base-uncased"
MODEL_PATH="bert-base-uncased/"
TIMESTAMP=$(date "+%Y.%m.%d-%H.%M.%S")
python "${MACHINE_INFO_SCRIPT}"
for NUM_THREADS in 2 4 8; do
export OMP_NUM_THREADS="${NUM_THREADS}"
echo "Update OMP_NUM_THREADS TO ${OMP_NUM_THREADS}"
# the following values are set in the base image by default
# KMP_AFFINITY=granularity=fine,verbose,compact,1,0
# KMP_BLOCKTIME=0
# KMP_INIT_AT_FORK=FALSE
# KMP_SETTINGS=1
# Pytorch Model (fp32/int8)
python "${BENCHMARK_SCRIPT}" -e torch -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
python "${BENCHMARK_SCRIPT}" -e torch -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8
# Torchscript Model (fp32/int8)
python "${BENCHMARK_SCRIPT}" -e torchscript -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
python "${BENCHMARK_SCRIPT}" -e torchscript -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8
# Tensorflow Model (fp32/int8)
python "${BENCHMARK_SCRIPT}" -e tensorflow -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
python "${BENCHMARK_SCRIPT}" -e tensorflow -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8
awk '!x[$0]++' ./result.csv > "summary_result_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
awk '!x[$0]++' ./fusion.csv > "summary_fusion_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
awk '!x[$0]++' ./detail.csv > "summary_detail_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
done
for NUM_THREADS in 2 4 8; do
# onnxruntime version >= 1.8 uses thread pool instead of OMP, so no need to export OMP_NUM_THREADS
# unsetting the following env variables is needed for two reasons
# 1. The variables are set by default in the base image
# 2. It slows down the onnx runtime speed
# https://github.com/microsoft/onnxruntime/issues/8385#issuecomment-883154985
unset KMP_AFFINITY
unset KMP_BLOCKTIME
unset KMP_INIT_AT_FORK
unset KMP_SETTINGS
# Onnx Runtime Pytorch Model (fp32/int8)
python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8
# Onnx Runtime TF Model (fp32/int8)
python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" --model_source tf -o
python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" --model_source tf -o -p int8
awk '!x[$0]++' ./result.csv >> "summary_result_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
awk '!x[$0]++' ./fusion.csv >> "summary_fusion_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
awk '!x[$0]++' ./detail.csv >> "summary_detail_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
done
# Upload results to GCS bucket
gsutil cp "summary*" gs://bert-inference-results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment