Skip to content

Instantly share code, notes, and snippets.

@robertoriv
Last active May 23, 2024 19:55
Show Gist options
  • Save robertoriv/7537e77844d12154437dee0116f79603 to your computer and use it in GitHub Desktop.
Save robertoriv/7537e77844d12154437dee0116f79603 to your computer and use it in GitHub Desktop.
Testing karpenter's eviction queueing changes
apiVersion: apps/v1
kind: Deployment
metadata:
name: non-critical-pod
spec:
replicas: 10
selector:
matchLabels:
app: non-critical-pod
template:
metadata:
labels:
app: non-critical-pod
spec:
containers:
- name: alpine
image: alpine
command:
[
"/bin/sh",
"-c",
'while true; do echo "[$(date +%Y-%m-%d_%H:%M:%S)] [non-critical] Waiting 10s ...."; sleep 10; done',
]
lifecycle:
preStop:
exec:
command: ["/bin/sleep", "30"]
resources:
limits:
cpu: "1"
memory: 1Gi
requests:
cpu: "1"
memory: 1Gi
terminationGracePeriodSeconds: 35
nodeSelector:
karpenter.sh/nodepool: default
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: non-critical-pod-pdb
spec:
maxUnavailable: 3
selector:
matchLabels:
app: non-critical-pod
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: critical-pod
spec:
replicas: 10
selector:
matchLabels:
app: critical-pod
template:
metadata:
labels:
app: critical-pod
spec:
containers:
- name: alpine
image: alpine
command:
[
"/bin/sh",
"-c",
'while true; do echo "[$(date +%Y-%m-%d_%H:%M:%S)] [critical] Waiting 10s ...."; sleep 10; done',
]
lifecycle:
preStop:
exec:
command: ["/bin/sleep", "30"]
resources:
limits:
cpu: "1"
memory: 1Gi
requests:
cpu: "1"
memory: 1Gi
terminationGracePeriodSeconds: 35
priorityClassName: system-node-critical
nodeSelector:
karpenter.sh/nodepool: default
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: critical-pod-pdb
spec:
maxUnavailable: 3
selector:
matchLabels:
app: critical-pod
// Drain evicts pods from the node and returns true when all pods are evicted
// https://kubernetes.io/docs/concepts/architecture/nodes/#graceful-node-shutdown
func (t *Terminator) Drain(ctx context.Context, node *v1.Node) error {
pods, err := nodeutil.GetPods(ctx, t.kubeClient, node)
if err != nil {
return fmt.Errorf("listing pods on node, %w", err)
}
// evictablePods are pods that aren't yet terminating are eligible to have the eviction API called against them
evictablePods := lo.Filter(pods, func(p *v1.Pod, _ int) bool { return podutil.IsEvictable(p) })
// check if there are any pods that are still terminating and haven't exceeded their termination grace period
// if there are any, only queue pods from the same eviction group or lower
terminatingPods := lo.Filter(pods, func(p *v1.Pod, _ int) bool {
return podutil.IsTerminating(p) && !podutil.IsStuckTerminating(p, t.clock)
})
if len(terminatingPods) > 0 {
log.FromContext(ctx).Info(fmt.Sprintf("Started with %d evictable pods.", len(evictablePods)))
log.FromContext(ctx).Info(fmt.Sprintf("Found %d terminating pods: ", len(terminatingPods)))
for _, pod := range terminatingPods {
log.FromContext(ctx).Info(fmt.Sprintf(" - Pod: %s, Group: %d", pod.Name, podutil.GetPodEvictionGroup(pod)))
}
highestOrderPod := lo.MaxBy(terminatingPods, func(p *v1.Pod, max *v1.Pod) bool {
return podutil.GetPodEvictionGroup(p) > podutil.GetPodEvictionGroup(max)
})
log.FromContext(ctx).Info(fmt.Sprintf("Highest order pod: %s, Group: %d", highestOrderPod.Name, podutil.GetPodEvictionGroup(highestOrderPod)))
evictablePods = lo.Filter(evictablePods, func(p *v1.Pod, _ int) bool {
return podutil.GetPodEvictionGroup(p) <= podutil.GetPodEvictionGroup(highestOrderPod)
})
log.FromContext(ctx).Info(fmt.Sprintf("Narrowed the list of evictablePods to be queued to %d pods.", len(evictablePods)))
for _, pod := range evictablePods {
log.FromContext(ctx).Info(fmt.Sprintf(" - Pod: %s, Group: %d", pod.Name, podutil.GetPodEvictionGroup(pod)))
}
}
t.Evict(evictablePods)
// podsWaitingEvictionCount are the number of pods that either haven't had eviction called against them yet
// or are still actively terminated and haven't exceeded their termination grace period yet
podsWaitingEvictionCount := lo.CountBy(pods, func(p *v1.Pod) bool { return podutil.IsWaitingEviction(p, t.clock) })
if podsWaitingEvictionCount > 0 {
return NewNodeDrainError(fmt.Errorf("%d pods are waiting to be evicted", len(pods)))
}
return nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment