mwakaba2/run_multiple_threads_benchmark.sh Secret

## run_multiple_threads_benchmark.sh
#!/bin/bash

MACHINE_INFO_SCRIPT="onnxruntime/onnxruntime/python/tools/transformers/machine_info.py"
BENCHMARK_SCRIPT="onnxruntime/onnxruntime/python/tools/transformers/benchmark.py"
ITERATIONS=100
BATCH_SIZE=1
SEQUENCE_LENGTH=128
MODEL_NAME="bert-base-uncased"
MODEL_PATH="bert-base-uncased/"
TIMESTAMP=$(date "+%Y.%m.%d-%H.%M.%S")

python "${MACHINE_INFO_SCRIPT}"

for NUM_THREADS in 2 4 8; do
  export OMP_NUM_THREADS="${NUM_THREADS}"
  echo "Update OMP_NUM_THREADS TO ${OMP_NUM_THREADS}"

  # the following values are set in the base image by default
  # KMP_AFFINITY=granularity=fine,verbose,compact,1,0
  # KMP_BLOCKTIME=0
  # KMP_INIT_AT_FORK=FALSE
  # KMP_SETTINGS=1

  # Pytorch Model (fp32/int8)
  python "${BENCHMARK_SCRIPT}" -e torch -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
  python "${BENCHMARK_SCRIPT}" -e torch -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8

  # Torchscript Model (fp32/int8)
  python "${BENCHMARK_SCRIPT}" -e torchscript -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
  python "${BENCHMARK_SCRIPT}" -e torchscript -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8

  # Tensorflow Model (fp32/int8)
  python "${BENCHMARK_SCRIPT}" -e tensorflow -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
  python "${BENCHMARK_SCRIPT}" -e tensorflow -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8

  awk '!x[$0]++' ./result.csv > "summary_result_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
  awk '!x[$0]++' ./fusion.csv > "summary_fusion_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
  awk '!x[$0]++' ./detail.csv > "summary_detail_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
done

for NUM_THREADS in 2 4 8; do
  # onnxruntime version >= 1.8 uses thread pool instead of OMP, so no need to export OMP_NUM_THREADS

  # unsetting the following env variables is needed for two reasons
  # 1. The variables are set by default in the base image
  # 2. It slows down the onnx runtime speed
  # https://github.com/microsoft/onnxruntime/issues/8385#issuecomment-883154985
  unset KMP_AFFINITY
  unset KMP_BLOCKTIME
  unset KMP_INIT_AT_FORK
  unset KMP_SETTINGS

  # Onnx Runtime Pytorch Model (fp32/int8)
  python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
  python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8

  # Onnx Runtime TF Model (fp32/int8)
  python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" --model_source tf -o
  python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" --model_source tf -o -p int8

  awk '!x[$0]++' ./result.csv >> "summary_result_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
  awk '!x[$0]++' ./fusion.csv >> "summary_fusion_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
  awk '!x[$0]++' ./detail.csv >> "summary_detail_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
done

# Upload results to GCS bucket
gsutil cp "summary*" gs://bert-inference-results
	#!/bin/bash

	MACHINE_INFO_SCRIPT="onnxruntime/onnxruntime/python/tools/transformers/machine_info.py"
	BENCHMARK_SCRIPT="onnxruntime/onnxruntime/python/tools/transformers/benchmark.py"
	ITERATIONS=100
	BATCH_SIZE=1
	SEQUENCE_LENGTH=128
	MODEL_NAME="bert-base-uncased"
	MODEL_PATH="bert-base-uncased/"
	TIMESTAMP=$(date "+%Y.%m.%d-%H.%M.%S")

	python "${MACHINE_INFO_SCRIPT}"

	for NUM_THREADS in 2 4 8; do
	export OMP_NUM_THREADS="${NUM_THREADS}"
	echo "Update OMP_NUM_THREADS TO ${OMP_NUM_THREADS}"

	# the following values are set in the base image by default
	# KMP_AFFINITY=granularity=fine,verbose,compact,1,0
	# KMP_BLOCKTIME=0
	# KMP_INIT_AT_FORK=FALSE
	# KMP_SETTINGS=1

	# Pytorch Model (fp32/int8)
	python "${BENCHMARK_SCRIPT}" -e torch -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
	python "${BENCHMARK_SCRIPT}" -e torch -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8

	# Torchscript Model (fp32/int8)
	python "${BENCHMARK_SCRIPT}" -e torchscript -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
	python "${BENCHMARK_SCRIPT}" -e torchscript -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8

	# Tensorflow Model (fp32/int8)
	python "${BENCHMARK_SCRIPT}" -e tensorflow -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
	python "${BENCHMARK_SCRIPT}" -e tensorflow -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8

	awk '!x[$0]++' ./result.csv > "summary_result_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
	awk '!x[$0]++' ./fusion.csv > "summary_fusion_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
	awk '!x[$0]++' ./detail.csv > "summary_detail_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
	done

	for NUM_THREADS in 2 4 8; do
	# onnxruntime version >= 1.8 uses thread pool instead of OMP, so no need to export OMP_NUM_THREADS

	# unsetting the following env variables is needed for two reasons
	# 1. The variables are set by default in the base image
	# 2. It slows down the onnx runtime speed
	# https://github.com/microsoft/onnxruntime/issues/8385#issuecomment-883154985
	unset KMP_AFFINITY
	unset KMP_BLOCKTIME
	unset KMP_INIT_AT_FORK
	unset KMP_SETTINGS

	# Onnx Runtime Pytorch Model (fp32/int8)
	python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o
	python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" -o -p int8

	# Onnx Runtime TF Model (fp32/int8)
	python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" --model_source tf -o
	python "${BENCHMARK_SCRIPT}" -m "${MODEL_NAME}" --model-path "${MODEL_PATH}" -v -i 1 --overwrite -b "${BATCH_SIZE}" -s "${SEQUENCE_LENGTH}" -t "${ITERATIONS}" -f fusion.csv -r result.csv -d detail.csv -c ./cache_models --onnx_dir ./onnx_models --num_threads "${NUM_THREADS}" --model_source tf -o -p int8

	awk '!x[$0]++' ./result.csv >> "summary_result_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
	awk '!x[$0]++' ./fusion.csv >> "summary_fusion_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
	awk '!x[$0]++' ./detail.csv >> "summary_detail_openmp_thread_count_${NUM_THREADS}_${TIMESTAMP}.csv"
	done

	# Upload results to GCS bucket
	gsutil cp "summary*" gs://bert-inference-results