Skip to content

Instantly share code, notes, and snippets.

@pmendelski
Last active December 8, 2025 17:28
Show Gist options
  • Select an option

  • Save pmendelski/a0bc56e7d1d8365c3d050df8296f29a6 to your computer and use it in GitHub Desktop.

Select an option

Save pmendelski/a0bc56e7d1d8365c3d050df8296f29a6 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
set -euf -o pipefail
# Sample execution:
# # Warm-up
# NODE_POOLS=200 ./node-pool-creation-benchmark.sh
# # Run benchmark
# NODE_POOLS=200 ./node-pool-creation-benchmark.sh | tee benchmark.txt
: "${CLUSTER_NAME:="node-pool-benchmark"}"
: "${VERSION:="1.34.1-gke.1829001"}"
: "${LOCATION:="us-central1"}"
: "${NODE_POOLS:=200}"
: "${JOBSET_NAME:="test-jobset"}"
if ! gcloud container clusters describe "$CLUSTER_NAME" --location "$LOCATION" --format 'value(id)' &>/dev/null; then
echo "Creating cluster: $CLUSTER_NAME $VERSION $LOCATION"
gcloud container clusters create "$CLUSTER_NAME" \
--region "$LOCATION" --cluster-version "$VERSION" \
--enable-autoprovisioning \
--autoscaling-profile optimize-utilization \
--max-cpu "${MAX_CPU:-500}" --max-memory "${MAX_MEMORY:-2000}"
echo "Setting maintenance exclusions to block upgrades"
gcloud container clusters update "$CLUSTER_NAME" \
--location "$LOCATION" \
--add-maintenance-exclusion-start "$(date +'%Y-%m-%d')T00:00:00-00:00" \
--add-maintenance-exclusion-end "$(date +'%Y-%m-%d' -d '+10 days')T00:00:00-00:00" \
--add-maintenance-exclusion-name no_upgrades \
--add-maintenance-exclusion-scope no_upgrades
echo "Switching kubectl"
gcloud container clusters get-credentials "$CLUSTER_NAME" --location="$LOCATION"
echo "Installing jobset controller"
kubectl apply --server-side -f https://github.com/kubernetes-sigs/jobset/releases/download/v0.10.1/manifests.yaml
kubectl wait --for=condition=available \
deployment/jobset-controller-manager \
-n jobset-system \
--timeout=300s
else
echo "Cluster already exists: $CLUSTER_NAME"
echo "Switching kubectl"
gcloud container clusters get-credentials "$CLUSTER_NAME" --location="$LOCATION"
fi
kubectl apply -f - <<EOF
apiVersion: jobset.x-k8s.io/v1alpha2
kind: JobSet
metadata:
name: "${JOBSET_NAME?}"
annotations:
alpha.jobset.sigs.k8s.io/exclusive-topology: cloud.google.com/gke-nodepool
spec:
failurePolicy:
maxRestarts: 3
replicatedJobs:
- name: workers
replicas: ${NODE_POOLS?}
template:
spec:
parallelism: 1
completions: 1
backoffLimit: 10
template:
spec:
nodeSelector:
node.kubernetes.io/instance-type: e2-standard-2
containers:
- name: sleep
image: busybox
resources:
requests:
memory: 64Mi
cpu: 250m
command:
- sleep
args:
- infinity
EOF
echo "Waiting for jobs to be running across $NODE_POOLS node pools..."
START=$SECONDS
PREV_VALUE="-1"
while true; do
RUNNING_JOBS="$(kubectl get pods -l "jobset.sigs.k8s.io/jobset-name=test-jobset" | grep -c "Running" || true)"
ELAPSED=$((SECONDS - START))
if [ "$PREV_VALUE" -ne "$RUNNING_JOBS" ]; then
PREV_VALUE="$RUNNING_JOBS"
echo "Running jobs: $RUNNING_JOBS - $((ELAPSED / 60))m $((ELAPSED % 60))s"
fi
if [ "$RUNNING_JOBS" -ge "$NODE_POOLS" ]; then
JOBS_TIME=$((SECONDS - START))
echo "RESULT: Jobs running in $((JOBS_TIME / 60))m $((JOBS_TIME % 60))s"
break
fi
sleep 10
done
kubectl delete jobset "$JOBSET_NAME"
START=$SECONDS
PREV_VALUE="-1"
while true; do
RUNNING_NODE_POOLS="$(gcloud container node-pools list \
--location="$LOCATION" --cluster="$CLUSTER_NAME" \
--format="value(name)" --filter="status=RUNNING AND name!=default-pool" | wc -l)"
ELAPSED=$((SECONDS - START))
if [ "$PREV_VALUE" -ne "$RUNNING_NODE_POOLS" ]; then
PREV_VALUE="$RUNNING_NODE_POOLS"
echo "Running node pools: $RUNNING_NODE_POOLS - $((ELAPSED / 60))m $((ELAPSED % 60))s"
fi
if [ "$RUNNING_NODE_POOLS" -le 1 ]; then
DELETION_TIME=$((SECONDS - START))
break
fi
sleep 10
done
echo -e "\nRESULTS (node pools: $NODE_POOLS)"
echo "Jobs running: $((JOBS_TIME / 60))m $((JOBS_TIME % 60))s"
echo "Node pools deleted: $((DELETION_TIME / 60))m $((DELETION_TIME % 60))s"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment