Skip to content

Instantly share code, notes, and snippets.

@ams0
Last active May 30, 2024 12:49
Show Gist options
  • Save ams0/507a02d489d32fc6b892d5c89bdf3dee to your computer and use it in GitHub Desktop.
Save ams0/507a02d489d32fc6b892d5c89bdf3dee to your computer and use it in GitHub Desktop.
# Open source AI/ML workloads on Cloud Native Infrastructure: match made in heaven - Devtalks.ro - 2024
# https://docs.google.com/presentation/d/16P7rNydNXLn6AA3rDL8ZB7yevkZ3lWLwTPizlV5OB3A/edit?usp=sharing
#
export RG=aksclusters
export AZURE_LOCATION=westeurope
export CLUSTER_NAME=kaito
export K8S_VERSION=1.29.4
export ACR_NAME=aicommdaynl
az acr create --resource-group $RG --name $ACR_NAME --sku Basic
MODELNAME=mistral-7b-instruct
TAG="0.0.2"
# Copy over the mistral image to our ACR
az acr import -g $RG --name $ACR_NAME --source mcr.microsoft.com/aks/kaito/kaito-$MODELNAME:$TAG --image $MODELNAME:$TAG
MODELNAME=phi-2
TAG="0.0.3"
az acr import -g $RG --name $ACR_NAME --source mcr.microsoft.com/aks/kaito/kaito-$MODELNAME:$TAG --image $MODELNAME:$TAG
az acr repository list -g $RG --name $ACR_NAME
Result
-------------------
mistral-7b-instruct
phi-2
az aks create --location $AZURE_LOCATION \
--resource-group $RG \
--tier Standard \
--name ${CLUSTER_NAME} \
--node-count 2 \
--node-vm-size Standard_B4ms \
--enable-oidc-issuer \
--enable-workload-identity \
--enable-ai-toolchain-operator \
--node-provisioning-mode auto \
--enable-keda \
--enable-vpa \
--network-dataplan cilium \
--network-plugin azure \
--network-plugin-mode overlay \
--kubernetes-version $K8S_VERSION \
--attach-acr $ACR_NAME
az aks get-credentials --resource-group $RG --name ${CLUSTER_NAME} --overwrite-existing
# Get the Cluster Resource Group
export RG_ID=$(az group show -n $RG -o tsv --query id)
# Get the managed cluster Resource Group
export MC_RESOURCE_GROUP=$(az aks show --resource-group ${RG} --name ${CLUSTER_NAME} --query nodeResourceGroup -o tsv)
# Set a variable for the KAITO IDentity name
export KAITO_IDENTITY_NAME="ai-toolchain-operator-${CLUSTER_NAME}"
# Get the principal ID for the KAITO managed identity
export PRINCIPAL_ID=$(az identity show --name "${KAITO_IDENTITY_NAME}" --resource-group "${MC_RESOURCE_GROUP}" --query 'principalId' -o tsv)
# Grant contributor on the cluster resource group
az role assignment create --role "Contributor" --assignee "${PRINCIPAL_ID}" --scope $RG_ID
# Get the OIDC Issuer URL
export AKS_OIDC_ISSUER=$(az aks show --resource-group "${RG}" --name "${CLUSTER_NAME}" --query "oidcIssuerProfile.issuerUrl" -o tsv)
# Create the federation between the KAITO service account and the KAITO Azure Managed Identity
az identity federated-credential create --name "kaito-federated-identity" --identity-name "${KAITO_IDENTITY_NAME}" -g "${MC_RESOURCE_GROUP}" --issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"kube-system:kaito-gpu-provisioner" --audience api://AzureADTokenExchange
# If you check the kaito-gpu-provisioner pod, you'll see it's in CrashLoopBackOff
# due to the identity not yet having been configured with proper rights.
kubectl get pods -l app=ai-toolchain-operator -n kube-system
# Restart the GPU provisioner to reload authorization
kubectl rollout restart deployment/kaito-gpu-provisioner -n kube-system
# Check the pod again to confirm it's now running
kubectl get pods -l app=ai-toolchain-operator -n kube-system
MACHINE_SIZE=Standard_NC64as_T4_v3
MODELNAME=phi-2
ACR_NAME=aicommdaynl
kubectl apply -f - <<EOF
apiVersion: kaito.sh/v1alpha1
kind: Workspace
metadata:
annotations:
kaito.sh/enablelb: "True"
name: workspace-${MODELNAME}
resource:
instanceType: "${MACHINE_SIZE}"
labelSelector:
matchLabels:
apps: ${MODELNAME}
inference:
preset:
name: "${MODELNAME}"
presetOptions:
image: ${ACR_NAME}.azurecr.io/${MODELNAME}:${TAG}
EOF
watch kubectl get workspace,nodes,svc,pods,machines
helm repo add open-webui https://helm.openwebui.com/
helm repo update
helm upgrade --install \
--set ollama.enabled=false \
--set ollamaUrls='http://workspace-phi-2:80/chat' \
--set service.type=LoadBalancer \
--set service.annotations."service\.beta\.kubernetes\.io/azure-dns-label-name"=openwebui \
--set "extraEnvVars[0].name=WEBUI_AUTH" --set "extraEnvVars[0].value=none" \
open-webui open-webui/open-webui
open http://openwebui.westeurope.cloudapp.azure.com/
#kubectl run tmp-shell --rm -i --tty --overrides='{ "apiVersion": "v1", "spec": { "nodeSelector": { "kubernetes.io/os": "linux" } } }' --image nicolaka/netshoot -- /bin/bash
#curl -X POST "http://workspace-phi-2:80/chat" -H "accept: application/json" -H "Content-Type: application/json" -d '{"return_full_text": true, "generate_kwargs": {"max_length":1200},"prompt":"Who was the greatest Rome emperor?"}'
curl -X POST "http://devtalks.westeurope.cloudapp.azure.com/chat" -H "accept: application/json" -H "Content-Type: application/json" -d '{"return_full_text": true, "generate_kwargs": {"max_length":1200},"prompt":"Who was the greatest Rome emperor?"}'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment