Last active
April 21, 2024 18:08
-
-
Save silashansen/dff33e237946229623a9c26bae83dfa1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# THIS IS A VERY HACKY AD-HOC SOLUTION TO BALANCE NODES BASED ON MEMORY USAGE ACROSS THE CLUSTER | |
import subprocess | |
import json | |
import time | |
def execute_kubectl_command(command): | |
"""Executes a kubectl command and returns the output.""" | |
full_command = ["kubectl"] + command.split() | |
result = subprocess.run(full_command, capture_output=True, text=True) | |
if result.stderr: | |
raise Exception(f"Error executing kubectl command: {result.stderr}") | |
return result.stdout | |
def get_memory_usage(): | |
"""Gets memory usage for all nodes, excluding controlplane nodes, and returns it as a list of (node, usage) tuples.""" | |
# Fetch detailed node information | |
nodes_info = json.loads(execute_kubectl_command("get nodes -o json")) | |
non_controlplane_nodes = [] | |
for node in nodes_info['items']: | |
labels = node.get('metadata', {}).get('labels', {}) | |
if "node-role.kubernetes.io/controlplane" not in labels: | |
non_controlplane_nodes.append(node['metadata']['name']) | |
# Get memory usage for non-controlplane nodes | |
output = execute_kubectl_command("top nodes --no-headers") | |
usage_data = [] | |
for line in output.splitlines(): | |
parts = line.split() | |
node_name, memory_pct = parts[0], parts[4] | |
if node_name in non_controlplane_nodes: | |
memory_pct = int(memory_pct[:-1]) # Remove % and convert to int | |
usage_data.append((node_name, memory_pct)) | |
return usage_data | |
def cordon_nodes(nodes): | |
"""Cordons all nodes except for the specified node.""" | |
for node in nodes: | |
print (f"Cordoning {node}") | |
execute_kubectl_command(f"cordon {node}") | |
def uncordon_nodes(nodes): | |
"""Uncordons all nodes.""" | |
for node in nodes: | |
print (f"Uncordoning {node}") | |
execute_kubectl_command(f"uncordon {node}") | |
def delete_highest_memory_pod(target_node): | |
"""Deletes the pod using the most memory on the specified node.""" | |
# Fetch all pods across all namespaces with their node assignments | |
pods_info = json.loads(execute_kubectl_command("get pods --all-namespaces -o json")) | |
node_pod_map = {} | |
for item in pods_info['items']: | |
node_name = item['spec'].get('nodeName') | |
pod_name = item['metadata']['name'] | |
namespace = item['metadata']['namespace'] | |
if node_name == target_node: | |
node_pod_map[pod_name] = namespace | |
# Fetch memory usage of all pods | |
pod_usage_output = execute_kubectl_command("top pod --all-namespaces --no-headers") | |
highest_memory_usage = 0 | |
pod_to_delete = None | |
for line in pod_usage_output.splitlines(): | |
parts = line.split() | |
namespace, pod_name, _, memory_usage = parts[0], parts[1], parts[2], parts[3] | |
memory_usage = int(memory_usage[:-2]) # Remove unit (Ki, Mi, Gi) and convert to int | |
if pod_name in node_pod_map and node_pod_map[pod_name] == namespace and memory_usage > highest_memory_usage: | |
highest_memory_usage = memory_usage | |
pod_to_delete = (pod_name, namespace) | |
# Delete the identified pod | |
if pod_to_delete: | |
pod_name, namespace = pod_to_delete | |
print(f"Deleting pod {pod_name} in namespace {namespace} from node {target_node} due to high memory usage.") | |
execute_kubectl_command(f"delete pod {pod_name} --namespace {namespace}") | |
else: | |
print(f"No high-memory pod found on node {target_node} to delete.") | |
def move_pods_from_high_to_low(pods_to_move): | |
usage_data = get_node_stats() | |
lowest_memory_node, highest_memory_node = usage_data[0][0], usage_data[-1][0] | |
print (f"Lowest memory node: {lowest_memory_node}") | |
print (f"Highest memory node: {highest_memory_node}") | |
node_names = [node for node, _ in usage_data] | |
nodes_to_cordon = [node for node in node_names if node != lowest_memory_node] | |
cordon_nodes(nodes_to_cordon) | |
for _ in range(pods_to_move): | |
delete_highest_memory_pod(highest_memory_node) | |
uncordon_nodes(nodes_to_cordon) | |
def get_node_stats(): | |
usage_data = get_memory_usage() | |
if not usage_data: | |
print("No nodes found.") | |
return | |
usage_data.sort(key=lambda x: x[1]) | |
return usage_data | |
def balance_nodes(pods_to_move=1): | |
cycles_completed = 0 | |
continue_balancing = True | |
#continue while the difference between the highest and lowest memory usage is greater than 10% and the number of cycles is less than 25 | |
while continue_balancing and cycles_completed < 25: | |
node_stats = get_node_stats() | |
min_node, max_node = node_stats[0], node_stats[-1] | |
print(f"Node with lowest memory usage: {min_node[0]} ({min_node[1]}%)") | |
print(f"Node with highest memory usage: {max_node[0]} ({max_node[1]}%)") | |
if node_stats: | |
#If the difference between the highest and lowest memory usage is greater than 1% | |
if(max_node[1] - min_node[1] > 10): | |
print(f"Moving {pods_to_move} pods from {max_node[0]} to {min_node[0]}") | |
move_pods_from_high_to_low(pods_to_move) | |
print("Waiting for 1 minute before next iteration...") | |
time.sleep(60) | |
continue_balancing = True | |
else: | |
print("No need to balance nodes. Exiting...") | |
continue_balancing = False | |
else: | |
continue_balancing = False | |
cycles_completed += 1 | |
if __name__ == "__main__": | |
balance_nodes() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment