Skip to content

Instantly share code, notes, and snippets.

@jlewi
Created July 2, 2024 01:31
Show Gist options
  • Save jlewi/fd73cc2ae2ff2205fba84d5952b38723 to your computer and use it in GitHub Desktop.
Save jlewi/fd73cc2ae2ff2205fba84d5952b38723 to your computer and use it in GitHub Desktop.
Triton Dockerfile
ARG TRITON_VERSION=2.46.0
ARG TRITON_CONTAINER_VERSION=24.05
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.05-py3-min
############################################################################
## Production stage: Create container with just inference server executable
############################################################################
FROM ${BASE_IMAGE}
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
LABEL com.nvidia.tritonserver.version="${TRITON_SERVER_VERSION}"
ENV PATH /opt/tritonserver/bin:${PATH}
# Remove once https://github.com/openucx/ucx/pull/9148 is available
# in the min container.
ENV UCX_MEM_EVENTS no
ENV TF_ADJUST_HUE_FUSED 1
ENV TF_ADJUST_SATURATION_FUSED 1
ENV TF_ENABLE_WINOGRAD_NONFUSED 1
ENV TF_AUTOTUNE_THRESHOLD 2
ENV TRITON_SERVER_GPU_ENABLED 1
# Create a user that can be used to run triton as
# non-root. Make sure that this user to given ID 1000. All server
# artifacts copied below are assign to this user.
ENV TRITON_SERVER_USER=triton-server
RUN userdel tensorrt-server > /dev/null 2>&1 || true \
&& if ! id -u $TRITON_SERVER_USER > /dev/null 2>&1 ; then \
useradd $TRITON_SERVER_USER; \
fi \
&& [ `id -u $TRITON_SERVER_USER` -eq 1000 ] \
&& [ `id -g $TRITON_SERVER_USER` -eq 1000 ]
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
# Common dependencies. FIXME (can any of these be conditional? For
# example libcurl only needed for GCS?)
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
clang \
curl \
dirmngr \
git \
gperf \
libb64-0d \
libcurl4-openssl-dev \
libgoogle-perftools-dev \
libjemalloc-dev \
libnuma-dev \
libre2-9 \
software-properties-common \
wget \
\
&& rm -rf /var/lib/apt/lists/*
# Set TCMALLOC_RELEASE_RATE for users setting LD_PRELOAD with tcmalloc
ENV TCMALLOC_RELEASE_RATE 200
ENV DCGM_VERSION 3.2.6
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
RUN curl -o /tmp/cuda-keyring.deb \
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
&& apt install /tmp/cuda-keyring.deb \
&& rm /tmp/cuda-keyring.deb \
&& apt-get update \
&& apt-get install -y datacenter-gpu-manager=1:3.2.6
# Extra defensive wiring for CUDA Compat lib
RUN ln -sf ${_CUDA_COMPAT_PATH}/lib.real ${_CUDA_COMPAT_PATH}/lib \
&& echo ${_CUDA_COMPAT_PATH}/lib > /etc/ld.so.conf.d/00-cuda-compat.conf \
&& ldconfig \
&& rm -f ${_CUDA_COMPAT_PATH}/lib
# python3, python3-pip and some pip installs required for the python backend
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
python3 \
libarchive-dev \
python3-pip \
libpython3-dev \
&& pip3 install --upgrade pip \
&& pip3 install --upgrade \
wheel \
setuptools \
numpy \
virtualenv \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /opt/tritonserver
RUN rm -fr /opt/tritonserver/*
ENV NVIDIA_PRODUCT_NAME="Triton Server"
COPY docker/entrypoint.d/ /opt/nvidia/entrypoint.d/
ENV NVIDIA_BUILD_ID <unknown>
LABEL com.nvidia.build.id=<unknown>
LABEL com.nvidia.build.ref=
WORKDIR /opt
COPY --chown=1000:1000 build/install tritonserver
WORKDIR /opt/tritonserver
COPY --chown=1000:1000 NVIDIA_Deep_Learning_Container_License.pdf .
# Remove TRT contents that are not needed in runtime
RUN apt-get update && apt-get install -y libcudnn8-dev && ldconfig
RUN ARCH="$(uname -i)" \
&& rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \
&& rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \
&& rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
# Install required packages for TRT-LLM models
RUN python3 -m pip install --upgrade pip \
&& pip3 install transformers
# Drop the static libs
RUN ARCH="$(uname -i)" \
&& rm -f ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvinfer*.a \
${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvonnxparser_*.a
# Install TensorRT-LLM
RUN python3 -m pip install /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl -U --pre --extra-index-url https://pypi.nvidia.com \
&& rm -fv /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl
RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf
RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf
ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment