mdhasanai/Nvidia-triton-inference-server_Install_and_test_locally

## Nvidia-triton-inference-server_Install_and_test_locally
######
	Triton Inference Server provides a cloud and edge inferencing solution optimized for both CPUs and GPUs.
	Triton supports an HTTP/REST and GRPC protocol that allows remote clients to request inferencing for any
	model being managed by the server.
######


################### INSTALL DOCKER ########################
# SET UP THE REPOSITORY
sudo apt-get update
sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release

# Add Docker’s official GPG key
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg

echo \
  "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
  $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null

# INSTALL DOCKER ENGINE
 sudo apt-get update
 sudo apt-get install docker-ce docker-ce-cli containerd.io


# To install a specific version of Docker Engine, list the available versions in the repo, then select and install:

apt-cache madison docker-ce

# ex: sudo apt-get install docker-ce=<VERSION_STRING> docker-ce-cli=<VERSION_STRING> containerd.io

sudo apt-get install docker-ce=5:20.10.6~3-0~ubuntu-bionic docker-ce-cli=5:20.10.6~3-0~ubuntu-bionic containerd.io

# Verify that Docker Engine is installed correctly by running the hello-world image.
sudo docker run hello-world

################### INSTALL NVIDIA Container Toolkit ########################

###### Step 0: Pre-Requisites
# To install containerd as the container engine on the system, install some pre-requisite modules:

sudo modprobe overlay \
    && sudo modprobe br_netfilter

# We can also ensure these are persistent:

cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF

# If you’re going to use containerd as a CRI runtime with Kubernetes, configure the sysctl parameters:

cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables  = 1
net.ipv4.ip_forward                 = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF

	then,
	sudo sysctl --system

###### Step 1: Install containerd
# Install packages to allow apt to use a repository over HTTPS:
sudo apt-get install -y \
    apt-transport-https \
    ca-certificates \
    curl \
    gnupg-agent \
    software-properties-common

# Add the repository GPG key and the repo:
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository \
    "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
    $(lsb_release -cs) \
    stable"

# Now, install the containerd package:
sudo apt-get update \
    && sudo apt-get install -y containerd.io

# Configure containerd with a default config.toml configuration file:
sudo mkdir -p /etc/containerd \
    && sudo containerd config default | sudo tee /etc/containerd/config.toml

cat <<EOF > containerd-config.patch
--- config.toml.orig    2020-12-18 18:21:41.884984894 +0000
+++ /etc/containerd/config.toml 2020-12-18 18:23:38.137796223 +0000
@@ -94,6 +94,15 @@
        privileged_without_host_devices = false
        base_runtime_spec = ""
        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
+            SystemdCgroup = true
+       [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
+          privileged_without_host_devices = false
+          runtime_engine = ""
+          runtime_root = ""
+          runtime_type = "io.containerd.runc.v1"
+          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
+            BinaryName = "/usr/bin/nvidia-container-runtime"
+            SystemdCgroup = true
    [plugins."io.containerd.grpc.v1.cri".cni]
    bin_dir = "/opt/cni/bin"
    conf_dir = "/etc/cni/net.d"
EOF

sudo systemctl restart containerd

# You can test the installation by using the Docker hello-world container with the ctr tool:
sudo ctr image pull docker.io/library/hello-world:latest \
    && sudo ctr run --rm -t docker.io/library/hello-world:latest hello-world


###### Step 2: Install NVIDIA Container Toolkit
# First, setup the package repository and GPG key:
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
    && curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \
    && curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list

# Now, install the NVIDIA runtime:
sudo apt-get update \
    && sudo apt-get install -y nvidia-container-runtime

# Then, we can test a GPU container:
sudo ctr image pull docker.io/nvidia/cuda:11.0-base

sudo ctr run --rm --gpus 0 -t docker.io/nvidia/cuda:11.0-base cuda-11.0-base nvidia-smi


################### INSTALL NVIDIA DOCKER ########################
#### Setting up Docker
# Docker-CE on Ubuntu can be setup using Docker’s official convenience script:
curl https://get.docker.com | sh \
  && sudo systemctl --now enable docker

#### Setting up NVIDIA Container Toolkit
# Setup the stable repository and the GPG key:
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
   && curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \
   && curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list

curl -s -L https://nvidia.github.io/nvidia-container-runtime/experimental/$distribution/nvidia-container-runtime.list | sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list

# Install the nvidia-docker2 package (and dependencies) after updating the package listing:
sudo apt-get update
sudo apt-get install -y nvidia-docker2

# Restart the Docker daemon to complete the installation after setting the default runtime:
sudo systemctl restart docker

# At this point, a working setup can be tested by running a base CUDA container:
sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi


################### Install Triton Docker Image ########################

sudo docker pull nvcr.io/nvidia/tritonserver:<xx.yy>-py3

# EX: sudo docker pull nvcr.io/nvidia/tritonserver:21.03-py3

# To run Example model, Create A Model Repository
git clone https://github.com/triton-inference-server/server.git
cd server/docs/examples
./fetch_models.sh

################## Run Triton ###########################################
######### Run on System with GPUs
sudo docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /home/vasper/Hasan/server/docs/examples/model_repository:/densenet_onnx nvcr.io/nvidia/tritonserver:21.03-py3 tritonserver --model-repository=/densenet_onnx

######### Run on CPU-Only System
sudo docker run --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /home/vasper/Hasan/server/docs/examples/model_repository:/densenet_onnx nvcr.io/nvidia/tritonserver:21.03-py3 tritonserver --model-repository=/densenet_onnx


######### Verify Triton Is Running Correctly
curl -v localhost:8000/v2/health/ready

# The HTTP request returns status 200 if Triton is ready and non-200 if it is not ready.

######### Getting The Client Examples
# Use docker pull to get the client libraries and examples image from NGC.
sudo docker pull nvcr.io/nvidia/tritonserver:21.03-py3-sdk

# Run the client image.
sudo docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:21.03-py3-sdk


######### Running The Image Classification Example
# From within the nvcr.io/nvidia/tritonserver:<xx.yy>-py3-sdk image, run the example image-client application to perform image classification using the example densenet_onnx model.
# To send a request for the densenet_onnx model use an image from the /workspace/images directory. In this case we ask for the top 3 classifications.

/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg Request 0, batch size

******************************** CONGRATULATIONS ********************************************
	######
	Triton Inference Server provides a cloud and edge inferencing solution optimized for both CPUs and GPUs.
	Triton supports an HTTP/REST and GRPC protocol that allows remote clients to request inferencing for any
	model being managed by the server.
	######



	################### INSTALL DOCKER ########################
	# SET UP THE REPOSITORY
	sudo apt-get update
	sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release

	# Add Docker’s official GPG key
	curl -fsSL https://download.docker.com/linux/ubuntu/gpg \| sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg

	echo \
	"deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
	$(lsb_release -cs) stable" \| sudo tee /etc/apt/sources.list.d/docker.list > /dev/null

	# INSTALL DOCKER ENGINE
	sudo apt-get update
	sudo apt-get install docker-ce docker-ce-cli containerd.io


	# To install a specific version of Docker Engine, list the available versions in the repo, then select and install:

	apt-cache madison docker-ce

	# ex: sudo apt-get install docker-ce=<VERSION_STRING> docker-ce-cli=<VERSION_STRING> containerd.io

	sudo apt-get install docker-ce=5:20.10.6~3-0~ubuntu-bionic docker-ce-cli=5:20.10.6~3-0~ubuntu-bionic containerd.io

	# Verify that Docker Engine is installed correctly by running the hello-world image.
	sudo docker run hello-world

	################### INSTALL NVIDIA Container Toolkit ########################

	###### Step 0: Pre-Requisites
	# To install containerd as the container engine on the system, install some pre-requisite modules:

	sudo modprobe overlay \
	&& sudo modprobe br_netfilter

	# We can also ensure these are persistent:

	cat <<EOF \| sudo tee /etc/modules-load.d/containerd.conf
	overlay
	br_netfilter
	EOF

	# If you’re going to use containerd as a CRI runtime with Kubernetes, configure the sysctl parameters:

	cat <<EOF \| sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
	net.bridge.bridge-nf-call-iptables = 1
	net.ipv4.ip_forward = 1
	net.bridge.bridge-nf-call-ip6tables = 1
	EOF

	then,
	sudo sysctl --system

	###### Step 1: Install containerd
	# Install packages to allow apt to use a repository over HTTPS:
	sudo apt-get install -y \
	apt-transport-https \
	ca-certificates \
	curl \
	gnupg-agent \
	software-properties-common

	# Add the repository GPG key and the repo:
	curl -fsSL https://download.docker.com/linux/ubuntu/gpg \| sudo apt-key add -
	sudo add-apt-repository \
	"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
	$(lsb_release -cs) \
	stable"

	# Now, install the containerd package:
	sudo apt-get update \
	&& sudo apt-get install -y containerd.io

	# Configure containerd with a default config.toml configuration file:
	sudo mkdir -p /etc/containerd \
	&& sudo containerd config default \| sudo tee /etc/containerd/config.toml

	cat <<EOF > containerd-config.patch
	--- config.toml.orig 2020-12-18 18:21:41.884984894 +0000
	+++ /etc/containerd/config.toml 2020-12-18 18:23:38.137796223 +0000
	@@ -94,6 +94,15 @@
	privileged_without_host_devices = false
	base_runtime_spec = ""
	[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
	+ SystemdCgroup = true
	+ [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
	+ privileged_without_host_devices = false
	+ runtime_engine = ""
	+ runtime_root = ""
	+ runtime_type = "io.containerd.runc.v1"
	+ [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
	+ BinaryName = "/usr/bin/nvidia-container-runtime"
	+ SystemdCgroup = true
	[plugins."io.containerd.grpc.v1.cri".cni]
	bin_dir = "/opt/cni/bin"
	conf_dir = "/etc/cni/net.d"
	EOF

	sudo systemctl restart containerd

	# You can test the installation by using the Docker hello-world container with the ctr tool:
	sudo ctr image pull docker.io/library/hello-world:latest \
	&& sudo ctr run --rm -t docker.io/library/hello-world:latest hello-world


	###### Step 2: Install NVIDIA Container Toolkit
	# First, setup the package repository and GPG key:
	distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
	&& curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey \| sudo apt-key add - \
	&& curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list \| sudo tee /etc/apt/sources.list.d/nvidia-docker.list

	# Now, install the NVIDIA runtime:
	sudo apt-get update \
	&& sudo apt-get install -y nvidia-container-runtime

	# Then, we can test a GPU container:
	sudo ctr image pull docker.io/nvidia/cuda:11.0-base

	sudo ctr run --rm --gpus 0 -t docker.io/nvidia/cuda:11.0-base cuda-11.0-base nvidia-smi



	################### INSTALL NVIDIA DOCKER ########################
	#### Setting up Docker
	# Docker-CE on Ubuntu can be setup using Docker’s official convenience script:
	curl https://get.docker.com \| sh \
	&& sudo systemctl --now enable docker

	#### Setting up NVIDIA Container Toolkit
	# Setup the stable repository and the GPG key:
	distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
	&& curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey \| sudo apt-key add - \
	&& curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list \| sudo tee /etc/apt/sources.list.d/nvidia-docker.list

	curl -s -L https://nvidia.github.io/nvidia-container-runtime/experimental/$distribution/nvidia-container-runtime.list \| sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list

	# Install the nvidia-docker2 package (and dependencies) after updating the package listing:
	sudo apt-get update
	sudo apt-get install -y nvidia-docker2

	# Restart the Docker daemon to complete the installation after setting the default runtime:
	sudo systemctl restart docker

	# At this point, a working setup can be tested by running a base CUDA container:
	sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi



	################### Install Triton Docker Image ########################

	sudo docker pull nvcr.io/nvidia/tritonserver:<xx.yy>-py3

	# EX: sudo docker pull nvcr.io/nvidia/tritonserver:21.03-py3

	# To run Example model, Create A Model Repository
	git clone https://github.com/triton-inference-server/server.git
	cd server/docs/examples
	./fetch_models.sh

	################## Run Triton ###########################################
	######### Run on System with GPUs
	sudo docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /home/vasper/Hasan/server/docs/examples/model_repository:/densenet_onnx nvcr.io/nvidia/tritonserver:21.03-py3 tritonserver --model-repository=/densenet_onnx

	######### Run on CPU-Only System
	sudo docker run --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /home/vasper/Hasan/server/docs/examples/model_repository:/densenet_onnx nvcr.io/nvidia/tritonserver:21.03-py3 tritonserver --model-repository=/densenet_onnx



	######### Verify Triton Is Running Correctly
	curl -v localhost:8000/v2/health/ready

	# The HTTP request returns status 200 if Triton is ready and non-200 if it is not ready.

	######### Getting The Client Examples
	# Use docker pull to get the client libraries and examples image from NGC.
	sudo docker pull nvcr.io/nvidia/tritonserver:21.03-py3-sdk

	# Run the client image.
	sudo docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:21.03-py3-sdk


	######### Running The Image Classification Example
	# From within the nvcr.io/nvidia/tritonserver:<xx.yy>-py3-sdk image, run the example image-client application to perform image classification using the example densenet_onnx model.
	# To send a request for the densenet_onnx model use an image from the /workspace/images directory. In this case we ask for the top 3 classifications.

	/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg Request 0, batch size

	****************************** CONGRATULATIONS ******************************************