Skip to content

Instantly share code, notes, and snippets.

@silashansen
Last active April 21, 2024 18:08
Show Gist options
  • Save silashansen/dff33e237946229623a9c26bae83dfa1 to your computer and use it in GitHub Desktop.
Save silashansen/dff33e237946229623a9c26bae83dfa1 to your computer and use it in GitHub Desktop.
# THIS IS A VERY HACKY AD-HOC SOLUTION TO BALANCE NODES BASED ON MEMORY USAGE ACROSS THE CLUSTER
import subprocess
import json
import time
def execute_kubectl_command(command):
"""Executes a kubectl command and returns the output."""
full_command = ["kubectl"] + command.split()
result = subprocess.run(full_command, capture_output=True, text=True)
if result.stderr:
raise Exception(f"Error executing kubectl command: {result.stderr}")
return result.stdout
def get_memory_usage():
"""Gets memory usage for all nodes, excluding controlplane nodes, and returns it as a list of (node, usage) tuples."""
# Fetch detailed node information
nodes_info = json.loads(execute_kubectl_command("get nodes -o json"))
non_controlplane_nodes = []
for node in nodes_info['items']:
labels = node.get('metadata', {}).get('labels', {})
if "node-role.kubernetes.io/controlplane" not in labels:
non_controlplane_nodes.append(node['metadata']['name'])
# Get memory usage for non-controlplane nodes
output = execute_kubectl_command("top nodes --no-headers")
usage_data = []
for line in output.splitlines():
parts = line.split()
node_name, memory_pct = parts[0], parts[4]
if node_name in non_controlplane_nodes:
memory_pct = int(memory_pct[:-1]) # Remove % and convert to int
usage_data.append((node_name, memory_pct))
return usage_data
def cordon_nodes(nodes):
"""Cordons all nodes except for the specified node."""
for node in nodes:
print (f"Cordoning {node}")
execute_kubectl_command(f"cordon {node}")
def uncordon_nodes(nodes):
"""Uncordons all nodes."""
for node in nodes:
print (f"Uncordoning {node}")
execute_kubectl_command(f"uncordon {node}")
def delete_highest_memory_pod(target_node):
"""Deletes the pod using the most memory on the specified node."""
# Fetch all pods across all namespaces with their node assignments
pods_info = json.loads(execute_kubectl_command("get pods --all-namespaces -o json"))
node_pod_map = {}
for item in pods_info['items']:
node_name = item['spec'].get('nodeName')
pod_name = item['metadata']['name']
namespace = item['metadata']['namespace']
if node_name == target_node:
node_pod_map[pod_name] = namespace
# Fetch memory usage of all pods
pod_usage_output = execute_kubectl_command("top pod --all-namespaces --no-headers")
highest_memory_usage = 0
pod_to_delete = None
for line in pod_usage_output.splitlines():
parts = line.split()
namespace, pod_name, _, memory_usage = parts[0], parts[1], parts[2], parts[3]
memory_usage = int(memory_usage[:-2]) # Remove unit (Ki, Mi, Gi) and convert to int
if pod_name in node_pod_map and node_pod_map[pod_name] == namespace and memory_usage > highest_memory_usage:
highest_memory_usage = memory_usage
pod_to_delete = (pod_name, namespace)
# Delete the identified pod
if pod_to_delete:
pod_name, namespace = pod_to_delete
print(f"Deleting pod {pod_name} in namespace {namespace} from node {target_node} due to high memory usage.")
execute_kubectl_command(f"delete pod {pod_name} --namespace {namespace}")
else:
print(f"No high-memory pod found on node {target_node} to delete.")
def move_pods_from_high_to_low(pods_to_move):
usage_data = get_node_stats()
lowest_memory_node, highest_memory_node = usage_data[0][0], usage_data[-1][0]
print (f"Lowest memory node: {lowest_memory_node}")
print (f"Highest memory node: {highest_memory_node}")
node_names = [node for node, _ in usage_data]
nodes_to_cordon = [node for node in node_names if node != lowest_memory_node]
cordon_nodes(nodes_to_cordon)
for _ in range(pods_to_move):
delete_highest_memory_pod(highest_memory_node)
uncordon_nodes(nodes_to_cordon)
def get_node_stats():
usage_data = get_memory_usage()
if not usage_data:
print("No nodes found.")
return
usage_data.sort(key=lambda x: x[1])
return usage_data
def balance_nodes(pods_to_move=1):
cycles_completed = 0
continue_balancing = True
#continue while the difference between the highest and lowest memory usage is greater than 10% and the number of cycles is less than 25
while continue_balancing and cycles_completed < 25:
node_stats = get_node_stats()
min_node, max_node = node_stats[0], node_stats[-1]
print(f"Node with lowest memory usage: {min_node[0]} ({min_node[1]}%)")
print(f"Node with highest memory usage: {max_node[0]} ({max_node[1]}%)")
if node_stats:
#If the difference between the highest and lowest memory usage is greater than 1%
if(max_node[1] - min_node[1] > 10):
print(f"Moving {pods_to_move} pods from {max_node[0]} to {min_node[0]}")
move_pods_from_high_to_low(pods_to_move)
print("Waiting for 1 minute before next iteration...")
time.sleep(60)
continue_balancing = True
else:
print("No need to balance nodes. Exiting...")
continue_balancing = False
else:
continue_balancing = False
cycles_completed += 1
if __name__ == "__main__":
balance_nodes()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment