Skip to content

Instantly share code, notes, and snippets.

View janakiramm's full-sized avatar

Janakiram MSV janakiramm

View GitHub Profile
# Create Triton service
cat <<EOF > triton-service.yaml
apiVersion: v1
kind: Service
metadata:
name: triton
namespace: model-server
spec:
type: NodePort
selector:
# Create the namespace
kubectl create ns model-registry
# Create the tenant in the namespace
kubectl minio tenant create model-registry \
--servers 1 \
--volumes 4 \
--capacity 5Gi \
--namespace model-registry \
--storage-class local-path
# Install MinIO operator
wget https://github.com/minio/operator/releases/download/v4.2.7/kubectl-minio_4.2.7_linux_amd64 -O kubectl-minio
chmod +x kubectl-minio
mv kubectl-minio /usr/local/bin/
kubectl minio version
kubectl minio init
cat <<EOF | sudo tee /etc/rancher/rke2/config.yaml
kube-controller-manager-arg:
- "cluster-signing-legacy-unknown-cert-file=/var/lib/rancher/rke2/server/tls/server-ca.crt"
- "cluster-signing-legacy-unknown-key-file=/var/lib/rancher/rke2/server/tls/server-ca.key"
write-kubeconfig-mode: "0644"
write-kubeconfig: "/root/.kube/config"
cni: "calico"
tls-san:
- triton
- 10.148.0.60
# add NVIDIA repo and refresh Helm
helm repo add nvidia https://nvidia.github.io/gpu-operator && helm repo update
# install GPU operator Helm chart
helm install --wait --generate-name \
nvidia/gpu-operator $HELM_OPTIONS \
--set operator.defaultRuntime=containerd \
--set toolkit.env[0].name=CONTAINERD_CONFIG \
--set toolkit.env[0].value=$CONTAINERD_CONFIG \
--set toolkit.env[1].name=CONTAINERD_SOCKET \
kubectl run gpu-test \
--rm -t -i \
--restart=Never \
--image=nvcr.io/nvidia/cuda:10.1-base-ubuntu18.04 nvidia-smi
if grep -q 'data-dir' /etc/rancher/rke2/config.yaml; then
DATA_DIR=$(grep 'data-dir' /etc/rancher/rke2/config.yaml | awk '{print $2}')
else
DATA_DIR=/var/lib/rancher/rke2
fi
CONTAINERD_CONFIG=$DATA_DIR/agent/etc/containerd/config.toml.tmpl
cat <<EOF | sudo tee $CONTAINERD_CONFIG
version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
# install Helm3
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
# add RKE2 binaries to path
export PATH=$PATH:/var/lib/rancher/rke2/bin
echo "export PATH=$PATH:/var/lib/rancher/rke2/bin" >> ~/.bashrc
# copy RKE2 kubeconfig file to the default location
mkdir ~/.kube
cp /etc/rancher/rke2/rke2.yaml ~/.kube/config
chmod 600 ~/.kube/config
# verify the configuration
curl -sfL https://get.rke2.io --output install.sh
chmod +x install.sh
./install.sh
# Enable and activate RKE2 server
systemctl enable rke2-server.service
systemctl start rke2-server.service