Skip to content

Instantly share code, notes, and snippets.

@henrylilei
Created November 5, 2019 01:47
Show Gist options
  • Save henrylilei/733e4b2b5256c348a2c5ca46a48175da to your computer and use it in GitHub Desktop.
Save henrylilei/733e4b2b5256c348a2c5ca46a48175da to your computer and use it in GitHub Desktop.
Using Kured to update kubelet config in AKS.
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: server-maintenance-priority
value: 1000000
globalDefault: false
description: "This priority class should be used for server maintenance pods only. It will be scheduled first to the node."
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kured
rules:
# Allow kured to read spec.unschedulable
# Allow kubectl to drain/uncordon
#
# NB: These permissions are tightly coupled to the bundled version of kubectl; the ones below
# match https://github.com/kubernetes/kubernetes/blob/v1.12.1/pkg/kubectl/cmd/drain.go
#
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "patch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["list","delete","get"]
- apiGroups: ["extensions"]
resources: ["daemonsets"]
verbs: ["get"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kured
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kured
subjects:
- kind: ServiceAccount
name: kured
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: kube-system
name: kured
rules:
# Allow kured to lock/unlock itself
- apiGroups: ["extensions"]
resources: ["daemonsets"]
resourceNames: ["kured"]
verbs: ["update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
namespace: kube-system
name: kured
subjects:
- kind: ServiceAccount
namespace: kube-system
name: kured
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kured
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kured
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kured # Must match `--ds-name`
namespace: kube-system # Must match `--ds-namespace`
spec:
selector:
matchLabels:
name: kured
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: kured
spec:
serviceAccountName: kured
tolerations:
- operator: "Exists"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
hostPID: true # Facilitate entering the host mount namespace via init
restartPolicy: Always
initContainers:
- name: kubelet-sentinel
image: debian:latest
imagePullPolicy: IfNotPresent
command: ['/bin/bash']
args:
- "-c"
- |
if [[ $(< /aks-node/var/lib/kubelet/cpu_manager_state) != *"static"* ]]; then
cp /aks-node/etc/default/kubelet ~/kubelet.1
grep -q 'kube-reserved' ~/kubelet.1 && sed -i 's/KUBELET_OPTS=$/KUBELET_OPTS=--cpu-cfs-quota=false --cpu-manager-policy=static/g' ~/kubelet.1 || sed -i 's/KUBELET_OPTS=$/KUBELET_OPTS=--cpu-cfs-quota=false --cpu-manager-policy=static --kube-reserved=cpu=109m,memory=9543Mi/g' ~/kubelet.1
sed -i 's/--image-gc-high-threshold=85/--image-gc-high-threshold=75/g' ~/kubelet.1
sed -i 's/--image-gc-low-threshold=80/--image-gc-low-threshold=50/g' ~/kubelet.1
cp ~/kubelet.1 /aks-node/etc/default/kubelet
rm /aks-node/var/lib/kubelet/cpu_manager_state
echo 'true' > /aks-node/var/run/reboot-required-henrylilei
echo 'done modifying kubelet config'
else
rm /aks-node/var/run/reboot-required-henrylilei
echo 'kubelet in good condition'
fi
volumeMounts:
- name: etc-default
mountPath: /aks-node/etc/default
- name: var-run
mountPath: /aks-node/var/run
- name: var-lib-kubelet
mountPath: /aks-node/var/lib/kubelet
containers:
- name: kured
image: docker.io/weaveworks/kured:1.2.0
imagePullPolicy: IfNotPresent
securityContext:
privileged: true # Give permission to nsenter /proc/1/ns/mnt
env:
# Pass in the name of the node on which this pod is scheduled
# for use with drain/uncordon operations and lock acquisition
- name: KURED_NODE_ID
valueFrom:
fieldRef:
fieldPath: spec.nodeName
command:
- /usr/bin/kured
# - --alert-filter-regexp=^RebootRequired$
- --blocking-pod-selector=runtime=long,cost=expensive
- --blocking-pod-selector=name=temperamental
# - --blocking-pod-selector=...
- --ds-name=kured
- --ds-namespace=kube-system
# - --lock-annotation=weave.works/kured-node-lock
- --period=1m
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local
- --reboot-sentinel=/var/run/reboot-required-henrylilei # make sure you don't use reboot-required, since it is being used by kernel update
# - --slack-hook-url=https://hooks.slack.com/...
# - --slack-username=prod
priorityClassName: server-maintenance-priority
volumes:
- name: etc-default
hostPath:
path: /etc/default
- name: var-run
hostPath:
path: /var/run
- name: var-lib-kubelet
hostPath:
path: /var/lib/kubelet
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment