Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mdhasanai/ed6e1c1228604d6e778e99b5d6d719d9 to your computer and use it in GitHub Desktop.
Save mdhasanai/ed6e1c1228604d6e778e99b5d6d719d9 to your computer and use it in GitHub Desktop.
######
Triton Inference Server provides a cloud and edge inferencing solution optimized for both CPUs and GPUs.
Triton supports an HTTP/REST and GRPC protocol that allows remote clients to request inferencing for any
model being managed by the server.
######
################### INSTALL DOCKER ########################
# SET UP THE REPOSITORY
sudo apt-get update
sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release
# Add Docker’s official GPG key
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo \
"deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
# INSTALL DOCKER ENGINE
sudo apt-get update
sudo apt-get install docker-ce docker-ce-cli containerd.io
# To install a specific version of Docker Engine, list the available versions in the repo, then select and install:
apt-cache madison docker-ce
# ex: sudo apt-get install docker-ce=<VERSION_STRING> docker-ce-cli=<VERSION_STRING> containerd.io
sudo apt-get install docker-ce=5:20.10.6~3-0~ubuntu-bionic docker-ce-cli=5:20.10.6~3-0~ubuntu-bionic containerd.io
# Verify that Docker Engine is installed correctly by running the hello-world image.
sudo docker run hello-world
################### INSTALL NVIDIA Container Toolkit ########################
###### Step 0: Pre-Requisites
# To install containerd as the container engine on the system, install some pre-requisite modules:
sudo modprobe overlay \
&& sudo modprobe br_netfilter
# We can also ensure these are persistent:
cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF
# If you’re going to use containerd as a CRI runtime with Kubernetes, configure the sysctl parameters:
cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
then,
sudo sysctl --system
###### Step 1: Install containerd
# Install packages to allow apt to use a repository over HTTPS:
sudo apt-get install -y \
apt-transport-https \
ca-certificates \
curl \
gnupg-agent \
software-properties-common
# Add the repository GPG key and the repo:
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) \
stable"
# Now, install the containerd package:
sudo apt-get update \
&& sudo apt-get install -y containerd.io
# Configure containerd with a default config.toml configuration file:
sudo mkdir -p /etc/containerd \
&& sudo containerd config default | sudo tee /etc/containerd/config.toml
cat <<EOF > containerd-config.patch
--- config.toml.orig 2020-12-18 18:21:41.884984894 +0000
+++ /etc/containerd/config.toml 2020-12-18 18:23:38.137796223 +0000
@@ -94,6 +94,15 @@
privileged_without_host_devices = false
base_runtime_spec = ""
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
+ SystemdCgroup = true
+ [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
+ privileged_without_host_devices = false
+ runtime_engine = ""
+ runtime_root = ""
+ runtime_type = "io.containerd.runc.v1"
+ [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
+ BinaryName = "/usr/bin/nvidia-container-runtime"
+ SystemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "/opt/cni/bin"
conf_dir = "/etc/cni/net.d"
EOF
sudo systemctl restart containerd
# You can test the installation by using the Docker hello-world container with the ctr tool:
sudo ctr image pull docker.io/library/hello-world:latest \
&& sudo ctr run --rm -t docker.io/library/hello-world:latest hello-world
###### Step 2: Install NVIDIA Container Toolkit
# First, setup the package repository and GPG key:
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \
&& curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
# Now, install the NVIDIA runtime:
sudo apt-get update \
&& sudo apt-get install -y nvidia-container-runtime
# Then, we can test a GPU container:
sudo ctr image pull docker.io/nvidia/cuda:11.0-base
sudo ctr run --rm --gpus 0 -t docker.io/nvidia/cuda:11.0-base cuda-11.0-base nvidia-smi
################### INSTALL NVIDIA DOCKER ########################
#### Setting up Docker
# Docker-CE on Ubuntu can be setup using Docker’s official convenience script:
curl https://get.docker.com | sh \
&& sudo systemctl --now enable docker
#### Setting up NVIDIA Container Toolkit
# Setup the stable repository and the GPG key:
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \
&& curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
curl -s -L https://nvidia.github.io/nvidia-container-runtime/experimental/$distribution/nvidia-container-runtime.list | sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
# Install the nvidia-docker2 package (and dependencies) after updating the package listing:
sudo apt-get update
sudo apt-get install -y nvidia-docker2
# Restart the Docker daemon to complete the installation after setting the default runtime:
sudo systemctl restart docker
# At this point, a working setup can be tested by running a base CUDA container:
sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
################### Install Triton Docker Image ########################
sudo docker pull nvcr.io/nvidia/tritonserver:<xx.yy>-py3
# EX: sudo docker pull nvcr.io/nvidia/tritonserver:21.03-py3
# To run Example model, Create A Model Repository
git clone https://github.com/triton-inference-server/server.git
cd server/docs/examples
./fetch_models.sh
################## Run Triton ###########################################
######### Run on System with GPUs
sudo docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /home/vasper/Hasan/server/docs/examples/model_repository:/densenet_onnx nvcr.io/nvidia/tritonserver:21.03-py3 tritonserver --model-repository=/densenet_onnx
######### Run on CPU-Only System
sudo docker run --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /home/vasper/Hasan/server/docs/examples/model_repository:/densenet_onnx nvcr.io/nvidia/tritonserver:21.03-py3 tritonserver --model-repository=/densenet_onnx
######### Verify Triton Is Running Correctly
curl -v localhost:8000/v2/health/ready
# The HTTP request returns status 200 if Triton is ready and non-200 if it is not ready.
######### Getting The Client Examples
# Use docker pull to get the client libraries and examples image from NGC.
sudo docker pull nvcr.io/nvidia/tritonserver:21.03-py3-sdk
# Run the client image.
sudo docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:21.03-py3-sdk
######### Running The Image Classification Example
# From within the nvcr.io/nvidia/tritonserver:<xx.yy>-py3-sdk image, run the example image-client application to perform image classification using the example densenet_onnx model.
# To send a request for the densenet_onnx model use an image from the /workspace/images directory. In this case we ask for the top 3 classifications.
/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg Request 0, batch size
******************************** CONGRATULATIONS ********************************************
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment