Dockerfile for benchmarking Bert Base Uncased Model in GCP (multiple threads execution + openmp disabled for onnxruntime)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install TF entreprise | |
FROM gcr.io/deeplearning-platform-release/tf2-cpu.2-5:latest | |
WORKDIR /workspace | |
ENV OMP_ENABLED=true | |
ENV OMP_WAIT_POLICY='ACTIVE' | |
# onnxruntime version >= 1.8 replaced OMP with thread pool: https://github.com/microsoft/onnxruntime/issues/8385 | |
RUN pip install --no-cache-dir --upgrade pip && \ | |
pip install --upgrade torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html && \ | |
pip install coloredlogs sympy onnx tf2onnx onnxruntime==1.8.1 transformers==4.6.0 py-cpuinfo py3nvml | |
# Service account credentials are required to store benchmark results in a GCS bucket. | |
# Requirements before building this image. | |
# 1. Create a gcs bucket named gs://bert-inference-results | |
# 2. Create a service account and grant it access to the GCS bucket. | |
COPY ./<SERVICE_ACCOUNT_KEY_FILE>.json /workspace/<SERVICE_ACCOUNT_KEY_FILE>.json | |
RUN gcloud auth activate-service-account --key-file=/workspace/<SERVICE_ACCOUNT_KEY_FILE>.json | |
COPY ./run_multiple_threads_benchmark.sh /workspace/ | |
COPY ./bert-base-uncased /workspace/bert-base-uncased | |
# Zip the onnxruntime customized benchmark scripts from https://github.com/mwakaba2/onnxruntime/tree/benchmark-bert-base-uncased | |
# Add the zipped file to workspace | |
ADD onnxruntime_benchmark.tar.gz /workspace | |
CMD [ "./run_multiple_threads_benchmark.sh" ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
run_multiple_threads_benchmark.sh
can be found here