silashansen/balance_nodes.py

## balance_nodes.py
# THIS IS A VERY HACKY AD-HOC SOLUTION TO BALANCE NODES BASED ON MEMORY USAGE ACROSS THE CLUSTER

import subprocess
import json
import time

def execute_kubectl_command(command):
    """Executes a kubectl command and returns the output."""
    full_command = ["kubectl"] + command.split()
    result = subprocess.run(full_command, capture_output=True, text=True)
    if result.stderr:
        raise Exception(f"Error executing kubectl command: {result.stderr}")
    return result.stdout

def get_memory_usage():
    """Gets memory usage for all nodes, excluding controlplane nodes, and returns it as a list of (node, usage) tuples."""
    # Fetch detailed node information
    nodes_info = json.loads(execute_kubectl_command("get nodes -o json"))
    non_controlplane_nodes = []
    for node in nodes_info['items']:
        labels = node.get('metadata', {}).get('labels', {})
        if "node-role.kubernetes.io/controlplane" not in labels:
            non_controlplane_nodes.append(node['metadata']['name'])

    # Get memory usage for non-controlplane nodes
    output = execute_kubectl_command("top nodes --no-headers")
    usage_data = []
    for line in output.splitlines():
        parts = line.split()
        node_name, memory_pct = parts[0], parts[4]
        if node_name in non_controlplane_nodes:
            memory_pct = int(memory_pct[:-1])  # Remove % and convert to int
            usage_data.append((node_name, memory_pct))
    return usage_data

def cordon_nodes(nodes):
    """Cordons all nodes except for the specified node."""
    for node in nodes:
        print (f"Cordoning {node}")
        execute_kubectl_command(f"cordon {node}")

def uncordon_nodes(nodes):
    """Uncordons all nodes."""
    for node in nodes:
        print (f"Uncordoning {node}")
        execute_kubectl_command(f"uncordon {node}")

def delete_highest_memory_pod(target_node):
    """Deletes the pod using the most memory on the specified node."""
    # Fetch all pods across all namespaces with their node assignments
    pods_info = json.loads(execute_kubectl_command("get pods --all-namespaces -o json"))
    node_pod_map = {}
    for item in pods_info['items']:
        node_name = item['spec'].get('nodeName')
        pod_name = item['metadata']['name']
        namespace = item['metadata']['namespace']
        if node_name == target_node:
            node_pod_map[pod_name] = namespace

    # Fetch memory usage of all pods
    pod_usage_output = execute_kubectl_command("top pod --all-namespaces --no-headers")
    highest_memory_usage = 0
    pod_to_delete = None
    for line in pod_usage_output.splitlines():
        parts = line.split()
        namespace, pod_name, _, memory_usage = parts[0], parts[1], parts[2], parts[3]
        memory_usage = int(memory_usage[:-2])  # Remove unit (Ki, Mi, Gi) and convert to int
        if pod_name in node_pod_map and node_pod_map[pod_name] == namespace and memory_usage > highest_memory_usage:
            highest_memory_usage = memory_usage
            pod_to_delete = (pod_name, namespace)

    # Delete the identified pod
    if pod_to_delete:
        pod_name, namespace = pod_to_delete
        print(f"Deleting pod {pod_name} in namespace {namespace} from node {target_node} due to high memory usage.")
        execute_kubectl_command(f"delete pod {pod_name} --namespace {namespace}")
    else:
        print(f"No high-memory pod found on node {target_node} to delete.")

def move_pods_from_high_to_low(pods_to_move):
    usage_data = get_node_stats()
    lowest_memory_node, highest_memory_node = usage_data[0][0], usage_data[-1][0]

    print (f"Lowest memory node: {lowest_memory_node}")
    print (f"Highest memory node: {highest_memory_node}")

    node_names = [node for node, _ in usage_data]
    nodes_to_cordon = [node for node in node_names if node != lowest_memory_node]

    cordon_nodes(nodes_to_cordon)
    for _ in range(pods_to_move):
        delete_highest_memory_pod(highest_memory_node)
    uncordon_nodes(nodes_to_cordon)


def get_node_stats():
    usage_data = get_memory_usage()
    if not usage_data:
        print("No nodes found.")
        return
    usage_data.sort(key=lambda x: x[1])
    return usage_data

def balance_nodes(pods_to_move=1):

    cycles_completed = 0
    continue_balancing = True

    #continue while the difference between the highest and lowest memory usage is greater than 10% and the number of cycles is less than 25
    while continue_balancing and cycles_completed < 25:

        node_stats = get_node_stats()
        min_node, max_node = node_stats[0], node_stats[-1]
        print(f"Node with lowest memory usage: {min_node[0]} ({min_node[1]}%)")
        print(f"Node with highest memory usage: {max_node[0]} ({max_node[1]}%)")
        if node_stats:

            #If the difference between the highest and lowest memory usage is greater than 1%
            if(max_node[1] - min_node[1] > 10):
                print(f"Moving {pods_to_move} pods from {max_node[0]} to {min_node[0]}")
                move_pods_from_high_to_low(pods_to_move)
                print("Waiting for 1 minute before next iteration...")
                time.sleep(60)

                continue_balancing = True
            else:
                print("No need to balance nodes. Exiting...")
                continue_balancing = False
        else:
            continue_balancing = False

        cycles_completed += 1

if __name__ == "__main__":
    balance_nodes()
	# THIS IS A VERY HACKY AD-HOC SOLUTION TO BALANCE NODES BASED ON MEMORY USAGE ACROSS THE CLUSTER

	import subprocess
	import json
	import time

	def execute_kubectl_command(command):
	"""Executes a kubectl command and returns the output."""
	full_command = ["kubectl"] + command.split()
	result = subprocess.run(full_command, capture_output=True, text=True)
	if result.stderr:
	raise Exception(f"Error executing kubectl command: {result.stderr}")
	return result.stdout

	def get_memory_usage():
	"""Gets memory usage for all nodes, excluding controlplane nodes, and returns it as a list of (node, usage) tuples."""
	# Fetch detailed node information
	nodes_info = json.loads(execute_kubectl_command("get nodes -o json"))
	non_controlplane_nodes = []
	for node in nodes_info['items']:
	labels = node.get('metadata', {}).get('labels', {})
	if "node-role.kubernetes.io/controlplane" not in labels:
	non_controlplane_nodes.append(node['metadata']['name'])

	# Get memory usage for non-controlplane nodes
	output = execute_kubectl_command("top nodes --no-headers")
	usage_data = []
	for line in output.splitlines():
	parts = line.split()
	node_name, memory_pct = parts[0], parts[4]
	if node_name in non_controlplane_nodes:
	memory_pct = int(memory_pct[:-1]) # Remove % and convert to int
	usage_data.append((node_name, memory_pct))
	return usage_data

	def cordon_nodes(nodes):
	"""Cordons all nodes except for the specified node."""
	for node in nodes:
	print (f"Cordoning {node}")
	execute_kubectl_command(f"cordon {node}")

	def uncordon_nodes(nodes):
	"""Uncordons all nodes."""
	for node in nodes:
	print (f"Uncordoning {node}")
	execute_kubectl_command(f"uncordon {node}")

	def delete_highest_memory_pod(target_node):
	"""Deletes the pod using the most memory on the specified node."""
	# Fetch all pods across all namespaces with their node assignments
	pods_info = json.loads(execute_kubectl_command("get pods --all-namespaces -o json"))
	node_pod_map = {}
	for item in pods_info['items']:
	node_name = item['spec'].get('nodeName')
	pod_name = item['metadata']['name']
	namespace = item['metadata']['namespace']
	if node_name == target_node:
	node_pod_map[pod_name] = namespace

	# Fetch memory usage of all pods
	pod_usage_output = execute_kubectl_command("top pod --all-namespaces --no-headers")
	highest_memory_usage = 0
	pod_to_delete = None
	for line in pod_usage_output.splitlines():
	parts = line.split()
	namespace, pod_name, _, memory_usage = parts[0], parts[1], parts[2], parts[3]
	memory_usage = int(memory_usage[:-2]) # Remove unit (Ki, Mi, Gi) and convert to int
	if pod_name in node_pod_map and node_pod_map[pod_name] == namespace and memory_usage > highest_memory_usage:
	highest_memory_usage = memory_usage
	pod_to_delete = (pod_name, namespace)

	# Delete the identified pod
	if pod_to_delete:
	pod_name, namespace = pod_to_delete
	print(f"Deleting pod {pod_name} in namespace {namespace} from node {target_node} due to high memory usage.")
	execute_kubectl_command(f"delete pod {pod_name} --namespace {namespace}")
	else:
	print(f"No high-memory pod found on node {target_node} to delete.")

	def move_pods_from_high_to_low(pods_to_move):
	usage_data = get_node_stats()
	lowest_memory_node, highest_memory_node = usage_data[0][0], usage_data[-1][0]

	print (f"Lowest memory node: {lowest_memory_node}")
	print (f"Highest memory node: {highest_memory_node}")

	node_names = [node for node, _ in usage_data]
	nodes_to_cordon = [node for node in node_names if node != lowest_memory_node]

	cordon_nodes(nodes_to_cordon)
	for _ in range(pods_to_move):
	delete_highest_memory_pod(highest_memory_node)
	uncordon_nodes(nodes_to_cordon)


	def get_node_stats():
	usage_data = get_memory_usage()
	if not usage_data:
	print("No nodes found.")
	return
	usage_data.sort(key=lambda x: x[1])
	return usage_data

	def balance_nodes(pods_to_move=1):

	cycles_completed = 0
	continue_balancing = True

	#continue while the difference between the highest and lowest memory usage is greater than 10% and the number of cycles is less than 25
	while continue_balancing and cycles_completed < 25:

	node_stats = get_node_stats()
	min_node, max_node = node_stats[0], node_stats[-1]
	print(f"Node with lowest memory usage: {min_node[0]} ({min_node[1]}%)")
	print(f"Node with highest memory usage: {max_node[0]} ({max_node[1]}%)")
	if node_stats:

	#If the difference between the highest and lowest memory usage is greater than 1%
	if(max_node[1] - min_node[1] > 10):
	print(f"Moving {pods_to_move} pods from {max_node[0]} to {min_node[0]}")
	move_pods_from_high_to_low(pods_to_move)
	print("Waiting for 1 minute before next iteration...")
	time.sleep(60)

	continue_balancing = True
	else:
	print("No need to balance nodes. Exiting...")
	continue_balancing = False
	else:
	continue_balancing = False

	cycles_completed += 1

	if __name__ == "__main__":
	balance_nodes()