Skip to content

Instantly share code, notes, and snippets.

@miticojo
Last active July 25, 2024 07:32
Show Gist options
  • Save miticojo/5c68460cc8f9c8176b98b5d1ff3ad06b to your computer and use it in GitHub Desktop.
Save miticojo/5c68460cc8f9c8176b98b5d1ff3ad06b to your computer and use it in GitHub Desktop.
Analyze GKE realtime utilization
#!/bin/bash
set -e
# Function to convert resources to milli format
convert_to_m() {
local value=$1
case ${value: -1} in
m) echo "${value%?}";;
"") echo "$((value * 1000))";;
*) echo "$value";;
esac
}
# Function to convert memory resources to Ki
convert_memory_to_ki() {
local value=$1
value=${value%i} # Remove trailing 'i' if present
case ${value: -1} in
K) echo "${value%K}";;
M) echo "$((${value%M} * 1024))";;
G) echo "$((${value%G} * 1024 * 1024))";;
T) echo "$((${value%T} * 1024 * 1024 * 1024))";;
*) echo "$value";;
esac
}
# Check for required commands
for cmd in kubectl jq bc; do
if ! command -v $cmd &> /dev/null; then
echo "Error: $cmd is not installed. Please install it and try again."
exit 1
fi
done
# Function to get cluster type
get_cluster_type() {
local node_names=$(kubectl get nodes -o jsonpath='{.items[*].metadata.name}')
if [[ $node_names == *"autopilot"* ]]; then
echo "GKE Autopilot"
else
echo "GKE Standard"
fi
}
# Function to get node allocations
get_node_allocations() {
echo "Node Allocations:"
local total_cpu_alloc=0
local total_mem_alloc=0
while read -r line; do
local node=$(echo $line | awk '{print $1}')
local cpu_alloc=$(convert_to_m $(echo $line | awk '{print $2}'))
local mem_alloc=$(convert_memory_to_ki $(echo $line | awk '{print $3}'))
local cpu_cap=$(convert_to_m $(echo $line | awk '{print $4}'))
local mem_cap=$(convert_memory_to_ki $(echo $line | awk '{print $5}'))
total_cpu_alloc=$((total_cpu_alloc + cpu_alloc))
total_mem_alloc=$((total_mem_alloc + mem_alloc))
printf "Node: %-50s CPU Allocatable: %8sm/%8sm Memory Allocatable: %10sKi/%10sKi\n" "$node" "$cpu_alloc" "$cpu_cap" "$mem_alloc" "$mem_cap"
done < <(kubectl get nodes -o json | jq -r '.items[] | .metadata.name as $name | .status.allocatable as $alloc | .status.capacity as $cap | "\($name) \($alloc.cpu) \($alloc.memory) \(($cap.cpu | tonumber * 1000 | tostring) + "m") \($cap.memory)"')
echo "Total CPU Allocatable: ${total_cpu_alloc}m, Total Memory Allocatable: ${total_mem_alloc}Ki"
}
# Function to get node utilization
get_node_utilization() {
echo "Node Utilization:"
local node_top_output=$(kubectl top nodes --no-headers 2>/dev/null)
if [ -z "$node_top_output" ]; then
echo "Error: Metrics API not available. Please ensure metrics-server is installed and running."
else
echo "$node_top_output" | while read -r line; do
local node=$(echo $line | awk '{print $1}')
local cpu_used=$(echo $line | awk '{print $3}')
local mem_used=$(echo $line | awk '{print $5}')
printf "Node: %-50s CPU Used: %8s Memory Used: %10s\n" "$node" "$cpu_used" "$mem_used"
done
fi
}
# Function to get namespace utilization
get_namespace_utilization() {
echo "Namespace Resource Utilization:"
kubectl get namespaces -o json | jq -r '.items[].metadata.name' | while read -r ns; do
echo "Namespace: $ns"
local cpu_request=0
local mem_request=0
local cpu_limit=0
local mem_limit=0
local cpu_used=0
local mem_used=0
# Get resource requests and limits for pods in the namespace
while read -r cpu_req mem_req cpu_lim mem_lim; do
cpu_request=$((cpu_request + $(convert_to_m "$cpu_req")))
mem_request=$((mem_request + $(convert_memory_to_ki "$mem_req")))
cpu_limit=$((cpu_limit + $(convert_to_m "$cpu_lim")))
mem_limit=$((mem_limit + $(convert_memory_to_ki "$mem_lim")))
done < <(kubectl get pods -n "$ns" -o json | jq -r '.items[] | .spec.containers[] | (.resources.requests.cpu // "0") + " " + (.resources.requests.memory // "0") + " " + (.resources.limits.cpu // "0") + " " + (.resources.limits.memory // "0")')
# Get resource usage for pods in the namespace
local pod_top_output=$(kubectl top pods -n "$ns" --no-headers 2>/dev/null)
if [ -n "$pod_top_output" ]; then
while read -r _ cpu mem _; do
cpu_used=$((cpu_used + $(convert_to_m "$cpu")))
mem_used=$((mem_used + $(convert_memory_to_ki "$mem")))
done <<< "$pod_top_output"
fi
printf " CPU Request: %8sm, CPU Limit: %8sm, CPU Used: %8sm\n" "$cpu_request" "$cpu_limit" "$cpu_used"
printf " Memory Request: %10sKi, Memory Limit: %10sKi, Memory Used: %10sKi\n" "$mem_request" "$mem_limit" "$mem_used"
# Analysis and suggestions
if [ $cpu_used -ne 0 ]; then
local cpu_request_ratio=$(echo "scale=2; $cpu_request / $cpu_used" | bc)
local cpu_limit_ratio=$(echo "scale=2; $cpu_limit / $cpu_used" | bc)
if (( $(echo "$cpu_request_ratio > 1.5" | bc -l) )); then
local suggested_cpu_request=$(echo "scale=0; $cpu_used * 1.2 / 1" | bc)
echo " CPU request is significantly higher than usage. Consider reducing to ${suggested_cpu_request}m"
elif (( $(echo "$cpu_request_ratio < 0.5" | bc -l) )); then
local suggested_cpu_request=$(echo "scale=0; $cpu_used * 0.8 / 1" | bc)
echo " CPU request is significantly lower than usage. Consider increasing to ${suggested_cpu_request}m"
fi
if (( $(echo "$cpu_limit_ratio > 2" | bc -l) )); then
local suggested_cpu_limit=$(echo "scale=0; $cpu_used * 1.5 / 1" | bc)
echo " CPU limit is significantly higher than usage. Consider reducing to ${suggested_cpu_limit}m"
elif [ $cpu_limit -eq 0 ]; then
local suggested_cpu_limit=$(echo "scale=0; $cpu_used * 1.5 / 1" | bc)
echo " CPU limit is not set. Consider setting a limit of ${suggested_cpu_limit}m"
fi
fi
if [ $mem_used -ne 0 ]; then
local mem_request_ratio=$(echo "scale=2; $mem_request / $mem_used" | bc)
local mem_limit_ratio=$(echo "scale=2; $mem_limit / $mem_used" | bc)
if (( $(echo "$mem_request_ratio > 1.5" | bc -l) )); then
local suggested_mem_request=$(echo "scale=0; $mem_used * 1.2 / 1" | bc)
echo " Memory request is significantly higher than usage. Consider reducing to ${suggested_mem_request}Ki"
elif (( $(echo "$mem_request_ratio < 0.5" | bc -l) )); then
local suggested_mem_request=$(echo "scale=0; $mem_used * 0.8 / 1" | bc)
echo " Memory request is significantly lower than usage. Consider increasing to ${suggested_mem_request}Ki"
fi
if (( $(echo "$mem_limit_ratio > 2" | bc -l) )); then
local suggested_mem_limit=$(echo "scale=0; $mem_used * 1.5 / 1" | bc)
echo " Memory limit is significantly higher than usage. Consider reducing to ${suggested_mem_limit}Ki"
elif [ $mem_limit -eq 0 ]; then
local suggested_mem_limit=$(echo "scale=0; $mem_used * 1.5 / 1" | bc)
echo " Memory limit is not set. Consider setting a limit of ${suggested_mem_limit}Ki"
fi
fi
done
}
# Function to analyze resource efficiency
analyze_resource_efficiency() {
echo "Resource Efficiency Analysis:"
local total_cpu_alloc=0
local total_cpu_cap=0
local total_mem_alloc=0
local total_mem_cap=0
while read -r line; do
local node=$(echo $line | awk '{print $1}')
local cpu_alloc=$(convert_to_m $(echo $line | awk '{print $2}'))
local mem_alloc=$(convert_memory_to_ki $(echo $line | awk '{print $3}'))
local cpu_cap=$(convert_to_m $(echo $line | awk '{print $4}'))
local mem_cap=$(convert_memory_to_ki $(echo $line | awk '{print $5}'))
total_cpu_alloc=$((total_cpu_alloc + cpu_alloc))
total_cpu_cap=$((total_cpu_cap + cpu_cap))
total_mem_alloc=$((total_mem_alloc + mem_alloc))
total_mem_cap=$((total_mem_cap + mem_cap))
local cpu_efficiency=$(echo "scale=2; $cpu_alloc / $cpu_cap * 100" | bc)
local mem_efficiency=$(echo "scale=2; $mem_alloc / $mem_cap * 100" | bc)
echo "Node: $node"
echo " CPU Allocatable/Capacity: ${cpu_alloc}m/${cpu_cap}m (${cpu_efficiency}%)"
echo " Memory Allocatable/Capacity: ${mem_alloc}Ki/${mem_cap}Ki (${mem_efficiency}%)"
if (( $(echo "$cpu_efficiency < 70" | bc -l) )); then
echo " Consider adjusting CPU requests/limits or node size for better CPU utilization"
fi
if (( $(echo "$mem_efficiency < 70" | bc -l) )); then
echo " Consider adjusting memory requests/limits or node size for better memory utilization"
fi
echo
done < <(kubectl get nodes -o json | jq -r '.items[] | .metadata.name as $name | .status.allocatable as $alloc | .status.capacity as $cap | "\($name) \($alloc.cpu) \($alloc.memory) \(($cap.cpu | tonumber * 1000 | tostring) + "m") \($cap.memory)"')
# Calculate cluster-wide efficiency
local cluster_cpu_efficiency=$(echo "scale=2; $total_cpu_alloc / $total_cpu_cap * 100" | bc)
local cluster_mem_efficiency=$(echo "scale=2; $total_mem_alloc / $total_mem_cap * 100" | bc)
echo "Cluster-wide Resource Efficiency:"
echo " Total CPU Allocatable/Capacity: ${total_cpu_alloc}m/${total_cpu_cap}m (${cluster_cpu_efficiency}%)"
echo " Total Memory Allocatable/Capacity: ${total_mem_alloc}Ki/${total_mem_cap}Ki (${cluster_mem_efficiency}%)"
if (( $(echo "$cluster_cpu_efficiency < 70" | bc -l) )); then
echo " Consider adjusting cluster-wide CPU allocation for better utilization"
fi
if (( $(echo "$cluster_mem_efficiency < 70" | bc -l) )); then
echo " Consider adjusting cluster-wide memory allocation for better utilization"
fi
}
# New function to get GKE version
get_gke_version() {
local server_version=$(kubectl version -o json | jq -r '.serverVersion.gitVersion')
echo "$server_version"
}
# Main execution
print_header() {
echo -e "\n\033[1m${1}\033[0m"
echo "${1//?/=}"
}
print_subheader() {
echo -e "\n\033[1m${1}\033[0m"
echo "${1//?/-}"
}
print_key_value() {
printf "%-25s : %s\n" "$1" "$2"
}
main() {
print_header "GKE Cluster Analysis"
print_key_value "Cluster Type" "$(get_cluster_type)"
print_key_value "GKE Version" "$(get_gke_version)"
print_subheader "Node Allocations"
get_node_allocations
print_subheader "Node Utilization"
get_node_utilization
print_subheader "Namespace Resource Utilization"
get_namespace_utilization
print_subheader "Resource Efficiency Analysis"
analyze_resource_efficiency
print_subheader "Explanation of Terms"
echo "- Capacity: The total amount of resources on the node."
echo "- Allocatable: The amount of resources that can be requested by and allocated to pods."
echo "- Request: The minimum amount of resources a pod is guaranteed to have."
echo "- Limit: The maximum amount of resources a pod is allowed to use."
echo "- Used: The actual amount of resources currently being consumed by pods."
echo
echo "Note: In Autopilot clusters, GKE automatically manages resource allocation and scaling."
echo "Resource efficiency percentages represent Allocatable/Capacity ratios."
echo "CPU values are in millicores, where 1000m = 1 CPU core."
}
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment