Skip to content

Instantly share code, notes, and snippets.

@jackfrancis
Last active January 18, 2023 18:42
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save jackfrancis/911139fe51bdd6c7b8121e9aa41e536e to your computer and use it in GitHub Desktop.
Save jackfrancis/911139fe51bdd6c7b8121e9aa41e536e to your computer and use it in GitHub Desktop.
Build large AKS cluster
#!/bin/bash
if [ -z "$RESOURCE_GROUP" ]; then
echo "must provide a RESOURCE_GROUP env var"
exit 1;
fi
if [ -z "$REGION" ]; then
echo "must provide a REGION env var"
exit 1;
fi
if [ -z "$SUBSCRIPTION_ID" ]; then
echo "must provide a SUBSCRIPTION_ID env var"
exit 1;
fi
if [ -z "$NAME" ]; then
echo "must provide a NAME env var"
exit 1;
fi
export KUBERNETES_VERSION="${KUBERNETES_VERSION:-1.21.7}"
export SYSTEM_POOL_SKU="${USER_POOL_SKU:-Standard_D16s_v3}"
export USER_POOL_SKU="${USER_POOL_SKU:-Standard_D2s_v3}"
export NUM_USER_NODEPOOLS="${NUM_USER_NODEPOOLS:-5}"
export NODES_PER_SYSTEM_POOL="${NODES_PER_SYSTEM_POOL:-10}"
export NODES_PER_USER_POOL="${NODES_PER_USER_POOL:-998}"
export AKS_IAAS_RESOURCE_GROUP="MC_${RESOURCE_GROUP}_${NAME}_${REGION}"
export UNDERLAY="${UNDERLAY}"
export CLUSTER_VNET="${CLUSTER_VNET:-${NAME}vnet}"
export MAX_PODS="${MAX_PODS:-12}"
export ENABLE_SWIFT="${ENABLE_SWIFT:-false}"
export ENABLE_CALICO="${ENABLE_CALICO:-false}"
export ENABLE_NAT_GATEWAY="${ENABLE_NAT_GATEWAY:-true}"
export OUTBOUND_NAT_GATEWAY_IPS="${OUTBOUND_NAT_GATEWAY_IPS:-8}"
if [ "$TAGS" != "" ]; then
TAGS_ARG="--tags ${TAGS}"
fi
# Continually look for non-Succeeded VMSS instances
vmssHealthCheck() {
while true; do
NUM_VMSS=0
NUM_TERMINAL_VMSS=0
echo "$(date) Starting VMSS Health Remediation loop"
for VMSS in $(az vmss list -g $AKS_IAAS_RESOURCE_GROUP | jq -r '.[] | .name'); do
((NUM_VMSS++))
NUM_DELETED_INSTANCES=0
VMSS_PROVISIONING_STATE=$(az vmss show -g $AKS_IAAS_RESOURCE_GROUP -n $VMSS | jq -r '.provisioningState')
echo $(date) VMSS $VMSS has a ProvisioningState of $VMSS_PROVISIONING_STATE
VMSS_CAPACITY=$(az vmss list -g $AKS_IAAS_RESOURCE_GROUP | jq -r --arg VMSS "$VMSS" '.[] | select(.name == $VMSS) | .sku.capacity')
echo $(date) VMSS $VMSS has a current capacity of $VMSS_CAPACITY
if [ "$VMSS_PROVISIONING_STATE" == "Succeeded" ] || [ "$VMSS_PROVISIONING_STATE" == "Failed" ]; then
((NUM_TERMINAL_VMSS++))
HAS_FAILED_STATE_INSTANCE="false"
for TARGET_VMSS_INSTANCE in $(az vmss list-instances -g $AKS_IAAS_RESOURCE_GROUP -n $VMSS | jq -r '.[] | select(.provisioningState == "Failed") | .name'); do
HAS_FAILED_STATE_INSTANCE="true"
echo $(date) Deleting VMSS $VMSS instance $TARGET_VMSS_INSTANCE
if ! az vmss delete-instances -n $VMSS -g $AKS_IAAS_RESOURCE_GROUP --instance-id ${TARGET_VMSS_INSTANCE##*_} --no-wait; then
sleep 30
else
sleep 1
((NUM_DELETED_INSTANCES++))
fi
done
for TARGET_VMSS_INSTANCE in $(az vmss list-instances -g $AKS_IAAS_RESOURCE_GROUP -n $VMSS | jq -r '.[].resources[] | select(.name == "vmssCSE" and .provisioningState == "Failed") | .id' | awk -F'/' '{print $9}'); do
HAS_FAILED_STATE_INSTANCE="true"
echo $(date) Deleting VMSS $VMSS instance $TARGET_VMSS_INSTANCE
if ! az vmss delete-instances -n $VMSS -g $AKS_IAAS_RESOURCE_GROUP --instance-id ${TARGET_VMSS_INSTANCE##*_}; then
sleep 30
else
sleep 1
((NUM_DELETED_INSTANCES++))
fi
done
for TARGET_VMSS_INSTANCE in $(az vmss list-instances -g $AKS_IAAS_RESOURCE_GROUP -n $VMSS | jq -r '.[].resources[] | select(.publisher == "Microsoft.AKS" and .provisioningState != "Succeeded" and .provisioningState != "Creating" and .provisioningState != "Deleting") | .id' | awk -F'/' '{print $9}'); do
HAS_FAILED_STATE_INSTANCE="true"
echo $(date) Deleting VMSS $VMSS instance $TARGET_VMSS_INSTANCE
if ! az vmss delete-instances -n $VMSS -g $AKS_IAAS_RESOURCE_GROUP --instance-id ${TARGET_VMSS_INSTANCE##*_}; then
sleep 30
else
sleep 1
((NUM_DELETED_INSTANCES++))
fi
done
if [ "$HAS_FAILED_STATE_INSTANCE" == "true" ]; then
echo $(date) Waiting for $VMSS to reach a terminal ProvisioningState after failed instances were deleted...
sleep 30
until [[ $(az vmss show -g $AKS_IAAS_RESOURCE_GROUP -n $VMSS | jq -r '. | select(.provisioningState == "Succeeded" or .provisioningState == "Failed") | .name') ]]; do
echo $(date) Waiting for $VMSS to reach a terminal ProvisioningState after failed instances were deleted...
sleep 30
done
echo $(date) VMSS $VMSS is in a terminal state after failed instances were deleted!
fi
fi
if [ "$NUM_DELETED_INSTANCES" -gt "0" ]; then
echo $(date) Instances were deleted from VMSS $VMSS, ensuring that capacity is set to $VMSS_CAPACITY
az vmss scale --new-capacity $VMSS_CAPACITY -n $VMSS -g $AKS_IAAS_RESOURCE_GROUP --no-wait;
fi
done
sleep 150
done
}
az group create -n $RESOURCE_GROUP -l $REGION
if [ "$ENABLE_SWIFT" == "true" ]; then
az network vnet create -g $RESOURCE_GROUP --name $CLUSTER_VNET --address-prefixes 10.0.0.0/8 -o none
az network vnet subnet create -g $RESOURCE_GROUP --vnet-name $CLUSTER_VNET --name vms --address-prefixes 10.240.0.0/16 -o none
az network vnet subnet create -g $RESOURCE_GROUP --vnet-name $CLUSTER_VNET --name pods --address-prefixes 10.241.0.0/16 -o none
fi
if [ "$AKS_OPTIONS" == "" ]; then
if [ "$ENABLE_SWIFT" == "true" ]; then
AKS_OPTIONS+=" --vnet-subnet-id /subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${CLUSTER_VNET}/subnets/vms --pod-subnet-id /subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${CLUSTER_VNET}/subnets/pods"
fi
if [ "$UNDERLAY" != "" ]; then
AKS_OPTIONS+=" --aks-custom-headers ControlPlaneUnderlay=${UNDERLAY}"
fi
if [ "$ENABLE_CALICO" == "true" ]; then
AKS_OPTIONS+=" --aks-custom-headers EnableNetworkPluginNone=true"
fi
if [ "$ENABLE_NAT_GATEWAY" == "true" ]; then
AKS_OPTIONS+=" --outbound-type managedNATGateway --nat-gateway-managed-outbound-ip-count ${OUTBOUND_NAT_GATEWAY_IPS}"
fi
fi
az aks create -g $RESOURCE_GROUP -n $NAME $TAGS_ARG --kubernetes-version $KUBERNETES_VERSION -l $REGION -c $NODES_PER_SYSTEM_POOL -s $SYSTEM_POOL_SKU --max-pods $MAX_PODS --network-plugin azure --node-osdisk-type ephemeral --uptime-sla $AKS_OPTIONS || exit 1
until [[ $(az aks show -g $RESOURCE_GROUP -n $NAME | jq -r '.provisioningState') == "Succeeded" ]]; do
sleep 60
done
if [ "$ENABLE_CLUSTER_AUTOSCALER" != "true" ]; then
az aks scale -g $RESOURCE_GROUP -n $NAME -c $NODES_PER_SYSTEM_POOL --nodepool-name nodepool1 --no-wait
if [ "$VMSS_HEALTH_CHECK" == "true" ]; then
# Run VMSS Health Check in the background
vmssHealthCheck &
VMSS_HEALTH_CHECK_PID=$!
fi
fi
until [[ $(az aks show -g $RESOURCE_GROUP -n $NAME | jq -r '.provisioningState') == "Succeeded" ]]; do
sleep 60
done
((NUM_USER_NODEPOOLS++))
for ((i=2; i<=$NUM_USER_NODEPOOLS; i++)); do
# Wait 5 mins to spread out Azure API calls
sleep 300
if [ "$NODEPOOL_OPTIONS" == "" ]; then
if [ "$ENABLE_CLUSTER_AUTOSCALER" == "true" ]; then
NODEPOOL_OPTIONS+=" --enable-cluster-autoscaler --min-count 2 --max-count 900"
fi
if [ "$ENABLE_SWIFT" == "true" ]; then
NODEPOOL_OPTIONS+=" --vnet-subnet-id /subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${CLUSTER_VNET}/subnets/vms --pod-subnet-id /subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${CLUSTER_VNET}/subnets/pods"
fi
fi
az aks nodepool add -g $RESOURCE_GROUP --cluster-name $NAME -n nodepool$i -c $NODES_PER_USER_POOL $TAGS_ARG --max-pods $MAX_PODS -s $USER_POOL_SKU --node-osdisk-type ephemeral $NODEPOOL_OPTIONS --no-wait
done
cleanup() {
kill $VMSS_HEALTH_CHECK_PID >/dev/null 2>&1
exit 0
}
trap cleanup SIGINT
while true; do
sleep 30
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment