lrakai/script.sh

## script.sh
# Note I used 2 t2.micro EC2 instances with 1gb memory each as worker nodes
# 1gb in the OP maps to 440Mi (~50%) in the example

kubectl run pod1 --image=nginx --requests=memory=440Mi
kubectl run pod3 --image=nginx --requests=memory=440Mi
kubectl run pod4 --image=nginx --requests=memory=100Mi
kubectl run pod2 --image=nginx --requests=memory=100Mi

# The scheduler will alternate each deployment's pod to each of the two nodes like in the OP

# The new pod that can't be scheduled due to insufficient memory
kubectl run newpod --image=nginx --requests=memory=440Mi --replicas=1

# The "LowNodeUtilization" policy to allow newpod to be scheduled
cat << EOF > /tmp/policy.yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
  "LowNodeUtilization":
     enabled: true
     params:
       nodeResourceUtilizationThresholds:
         thresholds:
           "memory": 50
         targetThresholds:
           "memory": 51
           "pods": 0
EOF

# The following is what is required to get the descheduler to run as a job using the above policy
cat << EOF| kubectl create -f -
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
  name: descheduler-cluster-role
rules:
- apiGroups: [""]
  resources: ["nodes"]
  verbs: ["get", "watch", "list"]
- apiGroups: [""]
  resources: ["pods"]
  verbs: ["get", "watch", "list", "delete"]
- apiGroups: [""]
  resources: ["pods/eviction"]
  verbs: ["create"]
EOF

kubectl create sa descheduler-sa -n kube-system

kubectl create clusterrolebinding descheduler-cluster-role-binding \
    --clusterrole=descheduler-cluster-role \
    --serviceaccount=kube-system:descheduler-sa

kubectl create configmap descheduler-policy-configmap \
     -n kube-system --from-file=/tmp/policy.yaml

cat << EOF > /tmp/descheduler-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
  name: descheduler-job
  namespace: kube-system
spec:
  parallelism: 1
  completions: 1
  template:
    metadata:
      name: descheduler-pod
      annotations:
        scheduler.alpha.kubernetes.io/critical-pod: ""
    spec:
        containers:
        - name: descheduler
          image: jelmersnoeck/descheduler
          volumeMounts:
          - mountPath: /policy-dir
            name: policy-volume
          command:
          - /bin/descheduler
          - --policy-config-file
          - /policy-dir/policy.yaml
          - --v=7
        restartPolicy: "Never"
        serviceAccountName: descheduler-sa
        volumes:
        - name: policy-volume
          configMap:
            name: descheduler-policy-configmap
EOF

kubectl create -f /tmp/descheduler-job.yaml

# output of kubectl get pods before running the descheduler

# NAME                     READY     STATUS    RESTARTS   AGE
# newpod-b684d4dbc-tdwl8   0/1       Pending   0          13s
# pod1-db668b89f-wg62k     1/1       Running   0          7m
# pod2-76ff575445-c6t7w    1/1       Running   0          7m
# pod3-6fc56bfcbc-c9m86    1/1       Running   0          7m
# pod4-7698ffcc8f-lpnt6    1/1       Running   0          7m

# output of kubectl logs on the descheduler pod (irrelevant lines omitted)

# I0902 07:01:16.836621       1 lownodeutilization.go:227] Evicted pod: "pod2-76ff575445-c6t7w" (<nil>)
# I0902 07:01:16.837516       1 lownodeutilization.go:241] updated node usage: api.ResourceThresholds{"cpu":25, "memory ":49.39722579145635, "pods":3.6363636363636367}
# I0902 07:01:16.877865       1 lownodeutilization.go:227] Evicted pod: "pod3-6fc56bfcbc-c9m86" (<nil>)
# I0902 07:01:16.877878       1 lownodeutilization.go:241] updated node usage: api.ResourceThresholds{"pods":2.72727272 72727275, "cpu":25, "memory":0}
# I0902 07:01:16.877901       1 lownodeutilization.go:202] 2 pods evicted from node "ip-10-0-1-101" with usage map[pods :2.7272727272727275 cpu:25 memory:0]
# I0902 07:01:16.877918       1 lownodeutilization.go:191] evicting pods from node "ip-10-0-1-102" with usage: api.Reso urceThresholds{"cpu":25, "memory":60.62386801678734, "pods":3.6363636363636362}
# I0902 07:01:16.877933       1 lownodeutilization.go:202] 0 pods evicted from node "ip-10-0-1-102" with usage map[cpu: 25 memory:60.62386801678734 pods:3.6363636363636362]
# I0902 07:01:16.877943       1 lownodeutilization.go:94] Total number of pods evicted: 2
# I0902 07:01:16.877949       1 node_affinity.go:31] Evicted 0 pods

# output of kubectl get pods after descheduler run

# NAME                     READY     STATUS    RESTARTS   AGE
# newpod-b684d4dbc-tdwl8   1/1       Running   0          2m
# pod1-db668b89f-wg62k     1/1       Running   0          9m
# pod2-76ff575445-nddvh    1/1       Running   0          1m
# pod3-6fc56bfcbc-n7km4    1/1       Running   0          1m
# pod4-7698ffcc8f-lpnt6    1/1       Running   0          9m
	# Note I used 2 t2.micro EC2 instances with 1gb memory each as worker nodes
	# 1gb in the OP maps to 440Mi (~50%) in the example

	kubectl run pod1 --image=nginx --requests=memory=440Mi
	kubectl run pod3 --image=nginx --requests=memory=440Mi
	kubectl run pod4 --image=nginx --requests=memory=100Mi
	kubectl run pod2 --image=nginx --requests=memory=100Mi

	# The scheduler will alternate each deployment's pod to each of the two nodes like in the OP

	# The new pod that can't be scheduled due to insufficient memory
	kubectl run newpod --image=nginx --requests=memory=440Mi --replicas=1

	# The "LowNodeUtilization" policy to allow newpod to be scheduled
	cat << EOF > /tmp/policy.yaml
	apiVersion: "descheduler/v1alpha1"
	kind: "DeschedulerPolicy"
	strategies:
	"LowNodeUtilization":
	enabled: true
	params:
	nodeResourceUtilizationThresholds:
	thresholds:
	"memory": 50
	targetThresholds:
	"memory": 51
	"pods": 0
	EOF

	# The following is what is required to get the descheduler to run as a job using the above policy
	cat << EOF\| kubectl create -f -
	kind: ClusterRole
	apiVersion: rbac.authorization.k8s.io/v1beta1
	metadata:
	name: descheduler-cluster-role
	rules:
	- apiGroups: [""]
	resources: ["nodes"]
	verbs: ["get", "watch", "list"]
	- apiGroups: [""]
	resources: ["pods"]
	verbs: ["get", "watch", "list", "delete"]
	- apiGroups: [""]
	resources: ["pods/eviction"]
	verbs: ["create"]
	EOF

	kubectl create sa descheduler-sa -n kube-system

	kubectl create clusterrolebinding descheduler-cluster-role-binding \
	--clusterrole=descheduler-cluster-role \
	--serviceaccount=kube-system:descheduler-sa

	kubectl create configmap descheduler-policy-configmap \
	-n kube-system --from-file=/tmp/policy.yaml

	cat << EOF > /tmp/descheduler-job.yaml
	apiVersion: batch/v1
	kind: Job
	metadata:
	name: descheduler-job
	namespace: kube-system
	spec:
	parallelism: 1
	completions: 1
	template:
	metadata:
	name: descheduler-pod
	annotations:
	scheduler.alpha.kubernetes.io/critical-pod: ""
	spec:
	containers:
	- name: descheduler
	image: jelmersnoeck/descheduler
	volumeMounts:
	- mountPath: /policy-dir
	name: policy-volume
	command:
	- /bin/descheduler
	- --policy-config-file
	- /policy-dir/policy.yaml
	- --v=7
	restartPolicy: "Never"
	serviceAccountName: descheduler-sa
	volumes:
	- name: policy-volume
	configMap:
	name: descheduler-policy-configmap
	EOF

	kubectl create -f /tmp/descheduler-job.yaml

	# output of kubectl get pods before running the descheduler

	# NAME READY STATUS RESTARTS AGE
	# newpod-b684d4dbc-tdwl8 0/1 Pending 0 13s
	# pod1-db668b89f-wg62k 1/1 Running 0 7m
	# pod2-76ff575445-c6t7w 1/1 Running 0 7m
	# pod3-6fc56bfcbc-c9m86 1/1 Running 0 7m
	# pod4-7698ffcc8f-lpnt6 1/1 Running 0 7m

	# output of kubectl logs on the descheduler pod (irrelevant lines omitted)

	# I0902 07:01:16.836621 1 lownodeutilization.go:227] Evicted pod: "pod2-76ff575445-c6t7w" (<nil>)
	# I0902 07:01:16.837516 1 lownodeutilization.go:241] updated node usage: api.ResourceThresholds{"cpu":25, "memory ":49.39722579145635, "pods":3.6363636363636367}
	# I0902 07:01:16.877865 1 lownodeutilization.go:227] Evicted pod: "pod3-6fc56bfcbc-c9m86" (<nil>)
	# I0902 07:01:16.877878 1 lownodeutilization.go:241] updated node usage: api.ResourceThresholds{"pods":2.72727272 72727275, "cpu":25, "memory":0}
	# I0902 07:01:16.877901 1 lownodeutilization.go:202] 2 pods evicted from node "ip-10-0-1-101" with usage map[pods :2.7272727272727275 cpu:25 memory:0]
	# I0902 07:01:16.877918 1 lownodeutilization.go:191] evicting pods from node "ip-10-0-1-102" with usage: api.Reso urceThresholds{"cpu":25, "memory":60.62386801678734, "pods":3.6363636363636362}
	# I0902 07:01:16.877933 1 lownodeutilization.go:202] 0 pods evicted from node "ip-10-0-1-102" with usage map[cpu: 25 memory:60.62386801678734 pods:3.6363636363636362]
	# I0902 07:01:16.877943 1 lownodeutilization.go:94] Total number of pods evicted: 2
	# I0902 07:01:16.877949 1 node_affinity.go:31] Evicted 0 pods

	# output of kubectl get pods after descheduler run

	# NAME READY STATUS RESTARTS AGE
	# newpod-b684d4dbc-tdwl8 1/1 Running 0 2m
	# pod1-db668b89f-wg62k 1/1 Running 0 9m
	# pod2-76ff575445-nddvh 1/1 Running 0 1m
	# pod3-6fc56bfcbc-n7km4 1/1 Running 0 1m
	# pod4-7698ffcc8f-lpnt6 1/1 Running 0 9m