Created
February 22, 2022 13:06
-
-
Save benjvi/dc3ce349a6a383baee65f4a63ea4e70f to your computer and use it in GitHub Desktop.
Grafana Dashboard for Identifying Overprovisioned & Underprovisioned Workloads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"__inputs": [ | |
{ | |
"name": "DS_PROMETHEUS", | |
"label": "prometheus", | |
"description": "", | |
"type": "datasource", | |
"pluginId": "prometheus", | |
"pluginName": "Prometheus" | |
} | |
], | |
"__requires": [ | |
{ | |
"type": "grafana", | |
"id": "grafana", | |
"name": "Grafana", | |
"version": "7.5.4" | |
}, | |
{ | |
"type": "datasource", | |
"id": "prometheus", | |
"name": "Prometheus", | |
"version": "1.0.0" | |
}, | |
{ | |
"type": "panel", | |
"id": "stat", | |
"name": "Stat", | |
"version": "" | |
}, | |
{ | |
"type": "panel", | |
"id": "table", | |
"name": "Table", | |
"version": "" | |
}, | |
{ | |
"type": "panel", | |
"id": "text", | |
"name": "Text", | |
"version": "" | |
} | |
], | |
"annotations": { | |
"list": [ | |
{ | |
"builtIn": 1, | |
"datasource": "-- Grafana --", | |
"enable": true, | |
"hide": true, | |
"iconColor": "rgba(0, 211, 255, 1)", | |
"name": "Annotations & Alerts", | |
"type": "dashboard" | |
} | |
] | |
}, | |
"editable": true, | |
"gnetId": null, | |
"graphTooltip": 0, | |
"id": null, | |
"iteration": 1645534814589, | |
"links": [], | |
"panels": [ | |
{ | |
"datasource": null, | |
"fieldConfig": { | |
"defaults": {}, | |
"overrides": [] | |
}, | |
"gridPos": { | |
"h": 7, | |
"w": 2, | |
"x": 0, | |
"y": 0 | |
}, | |
"id": 12, | |
"options": { | |
"content": "# CPU", | |
"mode": "markdown" | |
}, | |
"pluginVersion": "7.5.4", | |
"targets": [ | |
{ | |
"queryType": "randomWalk", | |
"refId": "A" | |
} | |
], | |
"timeFrom": null, | |
"timeShift": null, | |
"type": "text" | |
}, | |
{ | |
"datasource": "${DS_PROMETHEUS}", | |
"fieldConfig": { | |
"defaults": { | |
"color": { | |
"mode": "thresholds" | |
}, | |
"mappings": [], | |
"thresholds": { | |
"mode": "absolute", | |
"steps": [ | |
{ | |
"color": "green", | |
"value": null | |
} | |
] | |
} | |
}, | |
"overrides": [] | |
}, | |
"gridPos": { | |
"h": 7, | |
"w": 5, | |
"x": 2, | |
"y": 0 | |
}, | |
"id": 7, | |
"interval": "1h", | |
"options": { | |
"colorMode": "value", | |
"graphMode": "area", | |
"justifyMode": "auto", | |
"orientation": "auto", | |
"reduceOptions": { | |
"calcs": [ | |
"lastNotNull" | |
], | |
"fields": "", | |
"values": false | |
}, | |
"text": {}, | |
"textMode": "auto" | |
}, | |
"pluginVersion": "7.5.4", | |
"targets": [ | |
{ | |
"exemplar": true, | |
"expr": "sum\n (\n # sum over pod giving total overprovisioning per pod, we can join this with pod owner information\n sum\n (\n # actual requests\n avg(kube_pod_container_resource_requests{resource=\"cpu\", unit=\"core\"}) by (namespace, pod, container) \n - \n # recommended request level based on usage\n (\n ( 1 + ($cpu_overhead_percent/100) ) * max(\n quantile_over_time(\n $cpu_percentile,\n sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate\n ) by (namespace,pod,container)\n [$__range:1h]\n )\n ) by (namespace,pod,container)\n ) \n ) by (namespace,pod)\n * \n on(namespace,pod)\n group_left(owner_name,owner_kind)\n kube_pod_owner\n > 0\n )", | |
"instant": false, | |
"interval": "", | |
"legendFormat": "", | |
"queryType": "randomWalk", | |
"refId": "A" | |
} | |
], | |
"title": "Total Overprovisioned CPU", | |
"type": "stat" | |
}, | |
{ | |
"datasource": "${DS_PROMETHEUS}", | |
"fieldConfig": { | |
"defaults": { | |
"color": { | |
"mode": "thresholds" | |
}, | |
"mappings": [], | |
"thresholds": { | |
"mode": "absolute", | |
"steps": [ | |
{ | |
"color": "green", | |
"value": null | |
} | |
] | |
} | |
}, | |
"overrides": [] | |
}, | |
"gridPos": { | |
"h": 7, | |
"w": 5, | |
"x": 7, | |
"y": 0 | |
}, | |
"id": 9, | |
"interval": "1h", | |
"options": { | |
"colorMode": "value", | |
"graphMode": "area", | |
"justifyMode": "auto", | |
"orientation": "auto", | |
"reduceOptions": { | |
"calcs": [ | |
"lastNotNull" | |
], | |
"fields": "", | |
"values": false | |
}, | |
"text": {}, | |
"textMode": "auto" | |
}, | |
"pluginVersion": "7.5.4", | |
"targets": [ | |
{ | |
"exemplar": true, | |
"expr": "sum\n (\n # sum over pod giving total overprovisioning per pod, we can join this with pod owner information\n sum\n (\n # actual requests\n avg(kube_pod_container_resource_requests{resource=\"cpu\", unit=\"core\"}) by (namespace, pod, container) \n - \n # recommended request level based on usage\n on (namespace, pod, container)\n (\n ( 1 + ($cpu_overhead_percent/100) ) * max(\n quantile_over_time(\n $cpu_percentile,\n sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate\n ) by (namespace,pod,container)\n [$__range:1h]\n )\n ) by (namespace,pod,container)\n )\n or on (namespace, pod, container) \n -1 * ( 1 + ($cpu_overhead_percent/100) ) * max(\n quantile_over_time(\n $cpu_percentile,\n sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate\n ) by (namespace,pod,container)\n [7d:1h]\n )\n ) by (namespace,pod,container) \n ) by (namespace,pod)\n * \n on(namespace,pod)\n group_left(owner_name,owner_kind)\n kube_pod_owner\n < 0\n )", | |
"instant": false, | |
"interval": "", | |
"legendFormat": "", | |
"queryType": "randomWalk", | |
"refId": "A" | |
} | |
], | |
"title": "Total Underprovisioned CPU", | |
"type": "stat" | |
}, | |
{ | |
"datasource": null, | |
"fieldConfig": { | |
"defaults": {}, | |
"overrides": [] | |
}, | |
"gridPos": { | |
"h": 7, | |
"w": 2, | |
"x": 12, | |
"y": 0 | |
}, | |
"id": 13, | |
"options": { | |
"content": "# Memory", | |
"mode": "markdown" | |
}, | |
"pluginVersion": "7.5.4", | |
"targets": [ | |
{ | |
"queryType": "randomWalk", | |
"refId": "A" | |
} | |
], | |
"timeFrom": null, | |
"timeShift": null, | |
"type": "text" | |
}, | |
{ | |
"datasource": "${DS_PROMETHEUS}", | |
"fieldConfig": { | |
"defaults": { | |
"color": { | |
"mode": "thresholds" | |
}, | |
"mappings": [], | |
"thresholds": { | |
"mode": "absolute", | |
"steps": [ | |
{ | |
"color": "green", | |
"value": null | |
} | |
] | |
} | |
}, | |
"overrides": [] | |
}, | |
"gridPos": { | |
"h": 7, | |
"w": 5, | |
"x": 14, | |
"y": 0 | |
}, | |
"id": 8, | |
"interval": "1h", | |
"options": { | |
"colorMode": "value", | |
"graphMode": "area", | |
"justifyMode": "auto", | |
"orientation": "auto", | |
"reduceOptions": { | |
"calcs": [ | |
"lastNotNull" | |
], | |
"fields": "", | |
"values": false | |
}, | |
"text": {}, | |
"textMode": "auto" | |
}, | |
"pluginVersion": "7.5.4", | |
"targets": [ | |
{ | |
"exemplar": true, | |
"expr": " sum(\n (\n avg(kube_pod_container_resource_requests{resource=\"memory\", unit=\"byte\"}) by (container,pod,namespace) \n - \n ( 1 + ($memory_overhead_percent/100) ) * max(quantile_over_time($memory_percentile,container_memory_working_set_bytes{container!=\"POD\",container!=\"\"}[$__range])) by (container,pod,namespace)\n ) / (1024 * 1024) \n *\n on(namespace,pod) \n group_left(owner_name,owner_kind) \n ( avg(kube_pod_owner) by (namespace,pod,owner_name,owner_kind) ) > 0\n ) ", | |
"instant": false, | |
"interval": "", | |
"legendFormat": "", | |
"queryType": "randomWalk", | |
"refId": "A" | |
} | |
], | |
"title": "Total Overprovisioned Memory (MB)", | |
"type": "stat" | |
}, | |
{ | |
"datasource": "${DS_PROMETHEUS}", | |
"fieldConfig": { | |
"defaults": { | |
"color": { | |
"mode": "thresholds" | |
}, | |
"mappings": [], | |
"thresholds": { | |
"mode": "absolute", | |
"steps": [ | |
{ | |
"color": "green", | |
"value": null | |
} | |
] | |
} | |
}, | |
"overrides": [] | |
}, | |
"gridPos": { | |
"h": 7, | |
"w": 5, | |
"x": 19, | |
"y": 0 | |
}, | |
"id": 10, | |
"interval": "1h", | |
"options": { | |
"colorMode": "value", | |
"graphMode": "area", | |
"justifyMode": "auto", | |
"orientation": "auto", | |
"reduceOptions": { | |
"calcs": [ | |
"lastNotNull" | |
], | |
"fields": "", | |
"values": false | |
}, | |
"text": {}, | |
"textMode": "auto" | |
}, | |
"pluginVersion": "7.5.4", | |
"targets": [ | |
{ | |
"exemplar": true, | |
"expr": " sum(\n (\n avg(kube_pod_container_resource_requests{resource=\"memory\", unit=\"byte\"}) by (container,pod,namespace) \n - \n ( 1 + ( $memory_overhead_percent / 100 ) ) * max(quantile_over_time($memory_percentile,container_memory_working_set_bytes{container!=\"POD\",container!=\"\"}[3h])) by (container,pod,namespace)\n or on (namespace, pod, container) -1 * ( 1 + ($memory_overhead_percent/100) ) * max(quantile_over_time($memory_percentile,container_memory_working_set_bytes{container!=\"POD\",container!=\"\"}[$__range])) by (container,pod,namespace)\n ) / (1024 * 1024) \n *\n on(namespace,pod) \n group_left(owner_name,owner_kind) \n ( avg(kube_pod_owner) by (namespace,pod,owner_name,owner_kind) ) < 0\n ) ", | |
"instant": false, | |
"interval": "", | |
"legendFormat": "", | |
"queryType": "randomWalk", | |
"refId": "A" | |
} | |
], | |
"title": "Total Underprovisioned Memory (MB)", | |
"type": "stat" | |
}, | |
{ | |
"collapsed": false, | |
"datasource": null, | |
"gridPos": { | |
"h": 1, | |
"w": 24, | |
"x": 0, | |
"y": 7 | |
}, | |
"id": 17, | |
"panels": [], | |
"title": "Overprovisioned", | |
"type": "row" | |
}, | |
{ | |
"datasource": "${DS_PROMETHEUS}", | |
"fieldConfig": { | |
"defaults": { | |
"color": { | |
"mode": "thresholds" | |
}, | |
"custom": { | |
"align": null, | |
"filterable": false | |
}, | |
"mappings": [], | |
"thresholds": { | |
"mode": "absolute", | |
"steps": [ | |
{ | |
"color": "green", | |
"value": null | |
}, | |
{ | |
"color": "red", | |
"value": 80 | |
} | |
] | |
} | |
}, | |
"overrides": [ | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "namespace" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 141 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "owner_kind" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 134 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Value #A" | |
}, | |
"properties": [ | |
{ | |
"id": "displayName", | |
"value": "CPU Overprovisioning Total" | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Value #B" | |
}, | |
"properties": [ | |
{ | |
"id": "displayName", | |
"value": "Pod Count" | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "CPU Overprovisioning Total" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 77 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Pod Count" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 93 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "owner_name" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 204 | |
} | |
] | |
} | |
] | |
}, | |
"gridPos": { | |
"h": 10, | |
"w": 12, | |
"x": 0, | |
"y": 8 | |
}, | |
"id": 2, | |
"interval": "1h", | |
"options": { | |
"showHeader": true, | |
"sortBy": [ | |
{ | |
"desc": true, | |
"displayName": "CPU Overprovisioning Total" | |
} | |
] | |
}, | |
"pluginVersion": "7.5.4", | |
"targets": [ | |
{ | |
"exemplar": true, | |
"expr": "topk\n(\n 8,\n # after joining owner, we can sum over all replicas attached to the same owner (controller) object\n sum\n (\n # sum over pod giving total overprovisioning per pod, we can join this with pod owner information\n sum\n (\n # actual requests\n avg(kube_pod_container_resource_requests{resource=\"cpu\", unit=\"core\"}) by (namespace, pod, container) \n - \n # recommended request level based on usage\n (\n ( 1 + ($cpu_overhead_percent/100) ) * max(\n quantile_over_time(\n $cpu_percentile,\n sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate\n ) by (namespace,pod,container)\n [$__range:1h]\n )\n ) by (namespace,pod,container)\n ) \n ) by (namespace,pod)\n * \n on(namespace,pod)\n group_left(owner_name,owner_kind)\n kube_pod_owner\n ) by (namespace,owner_name,owner_kind)\n)", | |
"format": "table", | |
"hide": false, | |
"instant": true, | |
"interval": "", | |
"legendFormat": "", | |
"queryType": "randomWalk", | |
"refId": "A" | |
}, | |
{ | |
"exemplar": true, | |
"expr": "count\n(\n (kube_pod_status_ready == 1)\n *\n on (namespace,pod)\n group_left(owner_name,owner_kind)\n (kube_pod_owner == 1)\n) by (namespace,owner_name,owner_kind)", | |
"format": "table", | |
"hide": false, | |
"instant": true, | |
"interval": "", | |
"legendFormat": "", | |
"refId": "B" | |
} | |
], | |
"title": "Top8 Overprovisioned Workloads CPU", | |
"transformations": [ | |
{ | |
"id": "filterFieldsByName", | |
"options": { | |
"include": { | |
"names": [ | |
"namespace", | |
"owner_kind", | |
"owner_name", | |
"Value #A", | |
"Value #B" | |
] | |
} | |
} | |
}, | |
{ | |
"id": "merge", | |
"options": {} | |
}, | |
{ | |
"id": "filterByValue", | |
"options": { | |
"filters": [ | |
{ | |
"config": { | |
"id": "greater", | |
"options": { | |
"value": 0 | |
} | |
}, | |
"fieldName": "Value #A" | |
} | |
], | |
"match": "all", | |
"type": "include" | |
} | |
} | |
], | |
"type": "table" | |
}, | |
{ | |
"datasource": "${DS_PROMETHEUS}", | |
"fieldConfig": { | |
"defaults": { | |
"color": { | |
"mode": "thresholds" | |
}, | |
"custom": { | |
"align": null, | |
"filterable": false | |
}, | |
"mappings": [], | |
"thresholds": { | |
"mode": "absolute", | |
"steps": [ | |
{ | |
"color": "green", | |
"value": null | |
}, | |
{ | |
"color": "red", | |
"value": 80 | |
} | |
] | |
} | |
}, | |
"overrides": [ | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "namespace" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 141 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "owner_kind" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 134 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Value #A" | |
}, | |
"properties": [ | |
{ | |
"id": "displayName", | |
"value": "Memory Overprovisioning Total (MB)" | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Value #B" | |
}, | |
"properties": [ | |
{ | |
"id": "displayName", | |
"value": "Pod Count" | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "CPU Overprovisioning Total" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 235 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Pod Count" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 95 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Memory Overprovisioning Total (MB)" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 106 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "owner_name" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 168 | |
} | |
] | |
} | |
] | |
}, | |
"gridPos": { | |
"h": 10, | |
"w": 12, | |
"x": 12, | |
"y": 8 | |
}, | |
"id": 4, | |
"interval": "12h", | |
"options": { | |
"showHeader": true, | |
"sortBy": [] | |
}, | |
"pluginVersion": "7.5.4", | |
"targets": [ | |
{ | |
"exemplar": true, | |
"expr": "topk(\n 8,\n sum(\n sum(\n (\n ## current resource reservation\n avg(kube_pod_container_resource_requests{resource=\"memory\", unit=\"byte\"}) by (container,pod,namespace) \n - \n # resource recommendation based on historical usage\n ( 1 + ($memory_overhead_percent/100) ) * max(quantile_over_time($memory_percentile,container_memory_working_set_bytes{container!=\"POD\",container!=\"\"}[$__range])) by (container,pod,namespace)\n ) / (1024 * 1024) \n ) by (pod,namespace)\n *\n on(namespace,pod) \n group_left(owner_name,owner_kind) \n ( avg(kube_pod_owner) by (namespace,pod,owner_name,owner_kind) )\n ) by (namespace,owner_name,owner_kind)\n)", | |
"format": "table", | |
"hide": false, | |
"instant": true, | |
"interval": "", | |
"legendFormat": "", | |
"queryType": "randomWalk", | |
"refId": "A" | |
}, | |
{ | |
"exemplar": true, | |
"expr": "count\n(\n (kube_pod_status_ready == 1)\n *\n on (namespace,pod)\n group_left(owner_name,owner_kind)\n (kube_pod_owner == 1)\n) by (namespace,owner_name,owner_kind)", | |
"format": "table", | |
"hide": false, | |
"instant": true, | |
"interval": "", | |
"legendFormat": "", | |
"refId": "B" | |
} | |
], | |
"title": "Top8 Overprovisioned Workloads Memory", | |
"transformations": [ | |
{ | |
"id": "filterFieldsByName", | |
"options": { | |
"include": { | |
"names": [ | |
"namespace", | |
"owner_kind", | |
"owner_name", | |
"Value #A", | |
"Value #B" | |
] | |
} | |
} | |
}, | |
{ | |
"id": "merge", | |
"options": {} | |
}, | |
{ | |
"id": "filterByValue", | |
"options": { | |
"filters": [ | |
{ | |
"config": { | |
"id": "greater", | |
"options": { | |
"value": 0 | |
} | |
}, | |
"fieldName": "Value #A" | |
} | |
], | |
"match": "all", | |
"type": "include" | |
} | |
} | |
], | |
"type": "table" | |
}, | |
{ | |
"collapsed": false, | |
"datasource": null, | |
"gridPos": { | |
"h": 1, | |
"w": 24, | |
"x": 0, | |
"y": 18 | |
}, | |
"id": 19, | |
"panels": [], | |
"title": "Underprovisioned", | |
"type": "row" | |
}, | |
{ | |
"datasource": "${DS_PROMETHEUS}", | |
"fieldConfig": { | |
"defaults": { | |
"color": { | |
"mode": "thresholds" | |
}, | |
"custom": { | |
"align": null, | |
"filterable": false | |
}, | |
"mappings": [], | |
"thresholds": { | |
"mode": "absolute", | |
"steps": [ | |
{ | |
"color": "green", | |
"value": null | |
}, | |
{ | |
"color": "red", | |
"value": 80 | |
} | |
] | |
} | |
}, | |
"overrides": [ | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "namespace" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 140 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "owner_kind" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 136 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Value #A" | |
}, | |
"properties": [ | |
{ | |
"id": "displayName", | |
"value": "CPU Underprovisioning Total" | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Value #B" | |
}, | |
"properties": [ | |
{ | |
"id": "displayName", | |
"value": "Pod Count" | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "owner_name" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 305 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "CPU Underprovisioning Total" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 218 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Pod Count" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 100 | |
} | |
] | |
} | |
] | |
}, | |
"gridPos": { | |
"h": 10, | |
"w": 12, | |
"x": 0, | |
"y": 19 | |
}, | |
"id": 3, | |
"interval": "1h", | |
"options": { | |
"showHeader": true, | |
"sortBy": [] | |
}, | |
"pluginVersion": "7.5.4", | |
"targets": [ | |
{ | |
"exemplar": true, | |
"expr": "bottomk\n(\n 8,\n # after joining owner, we can sum over all replicas attached to the same owner (controller) object\n sum\n (\n # sum over pod giving total overprovisioning per pod, we can join this with pod owner information\n sum\n (\n # actual requests\n sum(kube_pod_container_resource_requests{resource=\"cpu\", unit=\"core\"}) by (namespace, pod, container)\n -\n on (namespace, pod, container)\n # recommended request level based on usage\n ( 1 + ($cpu_overhead_percent/100) ) * max(\n quantile_over_time(\n $cpu_percentile,\n sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate\n ) by (namespace,pod,container)\n [$__range:1h]\n )\n ) by (namespace,pod,container) \n # if request is missing, overprovisioning is equal to recommended request\n or on (namespace, pod, container) \n -1 * ( 1 + ($cpu_overhead_percent/100) ) * max(\n quantile_over_time(\n $cpu_percentile,\n sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate\n ) by (namespace,pod,container)\n [$__range:1h]\n )\n ) by (namespace,pod,container) \n ) by (namespace,pod)\n * \n on(namespace,pod)\n group_left(owner_name,owner_kind)\n kube_pod_owner\n ) by (namespace,owner_name,owner_kind)\n)", | |
"format": "table", | |
"instant": true, | |
"interval": "", | |
"legendFormat": "", | |
"queryType": "randomWalk", | |
"refId": "A" | |
}, | |
{ | |
"exemplar": true, | |
"expr": "count\n(\n (kube_pod_status_ready == 1)\n *\n on (namespace,pod)\n group_left(owner_name,owner_kind)\n (kube_pod_owner == 1)\n) by (namespace,owner_name,owner_kind)", | |
"format": "table", | |
"hide": false, | |
"instant": true, | |
"interval": "", | |
"legendFormat": "", | |
"refId": "B" | |
} | |
], | |
"title": "Top8 Underprovisioned Workloads CPU", | |
"transformations": [ | |
{ | |
"id": "filterFieldsByName", | |
"options": { | |
"include": { | |
"names": [ | |
"namespace", | |
"owner_kind", | |
"owner_name", | |
"Value #A", | |
"Value #B" | |
] | |
} | |
} | |
}, | |
{ | |
"id": "merge", | |
"options": {} | |
}, | |
{ | |
"id": "filterByValue", | |
"options": { | |
"filters": [ | |
{ | |
"config": { | |
"id": "lower", | |
"options": { | |
"value": 0 | |
} | |
}, | |
"fieldName": "Value #A" | |
} | |
], | |
"match": "all", | |
"type": "include" | |
} | |
} | |
], | |
"type": "table" | |
}, | |
{ | |
"datasource": "${DS_PROMETHEUS}", | |
"fieldConfig": { | |
"defaults": { | |
"color": { | |
"mode": "thresholds" | |
}, | |
"custom": { | |
"align": null, | |
"filterable": false | |
}, | |
"mappings": [], | |
"thresholds": { | |
"mode": "absolute", | |
"steps": [ | |
{ | |
"color": "green", | |
"value": null | |
}, | |
{ | |
"color": "red", | |
"value": 80 | |
} | |
] | |
} | |
}, | |
"overrides": [ | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "namespace" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 141 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "owner_kind" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 134 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Value #A" | |
}, | |
"properties": [ | |
{ | |
"id": "displayName", | |
"value": "Memory Underprovisioning Total (MB)" | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Value #B" | |
}, | |
"properties": [ | |
{ | |
"id": "displayName", | |
"value": "Pod Count" | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "CPU Overprovisioning Total" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 235 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Pod Count" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 91 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "Memory Overprovisioning Total (MB)" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 246 | |
} | |
] | |
}, | |
{ | |
"matcher": { | |
"id": "byName", | |
"options": "owner_name" | |
}, | |
"properties": [ | |
{ | |
"id": "custom.width", | |
"value": 287 | |
} | |
] | |
} | |
] | |
}, | |
"gridPos": { | |
"h": 10, | |
"w": 12, | |
"x": 12, | |
"y": 19 | |
}, | |
"id": 5, | |
"options": { | |
"showHeader": true, | |
"sortBy": [] | |
}, | |
"pluginVersion": "7.5.4", | |
"targets": [ | |
{ | |
"exemplar": true, | |
"expr": "bottomk(\n 8,\n sum(\n sum(\n (\n ## current resource reservation\n avg(kube_pod_container_resource_requests{resource=\"memory\", unit=\"byte\"}) by (container,pod,namespace) \n - \n on(namespace, pod, container)\n # resource recommendation based on historical usage\n ( 1 + ($memory_overhead_percent/100) ) * max(quantile_over_time($memory_percentile,container_memory_working_set_bytes{container!=\"POD\",container!=\"\"}[$__range])) by (container,pod,namespace)\n or on (namespace, pod, container) -1 * ( 1 + ($memory_overhead_percent/100) ) * max(quantile_over_time($memory_percentile,container_memory_working_set_bytes{container!=\"POD\",container!=\"\"}[$__range])) by (container,pod,namespace)\n ) / (1024 * 1024) \n ) by (pod,namespace)\n *\n on(namespace,pod) \n group_left(owner_name,owner_kind) \n ( avg(kube_pod_owner) by (namespace,pod,owner_name,owner_kind) )\n ) by (namespace,owner_name,owner_kind)\n)", | |
"format": "table", | |
"hide": false, | |
"instant": true, | |
"interval": "", | |
"legendFormat": "", | |
"queryType": "randomWalk", | |
"refId": "A" | |
}, | |
{ | |
"exemplar": true, | |
"expr": "count\n(\n (kube_pod_status_ready == 1)\n *\n on (namespace,pod)\n group_left(owner_name,owner_kind)\n (kube_pod_owner == 1)\n) by (namespace,owner_name,owner_kind)", | |
"format": "table", | |
"hide": false, | |
"instant": true, | |
"interval": "", | |
"legendFormat": "", | |
"refId": "B" | |
} | |
], | |
"title": "Top8 Underprovisioned Workloads Memory", | |
"transformations": [ | |
{ | |
"id": "filterFieldsByName", | |
"options": { | |
"include": { | |
"names": [ | |
"namespace", | |
"owner_kind", | |
"owner_name", | |
"Value #A", | |
"Value #B" | |
] | |
} | |
} | |
}, | |
{ | |
"id": "merge", | |
"options": {} | |
}, | |
{ | |
"id": "filterByValue", | |
"options": { | |
"filters": [ | |
{ | |
"config": { | |
"id": "lowerOrEqual", | |
"options": { | |
"value": 0 | |
} | |
}, | |
"fieldName": "Value #A" | |
} | |
], | |
"match": "all", | |
"type": "include" | |
} | |
} | |
], | |
"type": "table" | |
} | |
], | |
"refresh": "", | |
"schemaVersion": 27, | |
"style": "dark", | |
"tags": [], | |
"templating": { | |
"list": [ | |
{ | |
"datasource": "prometheus", | |
"description": null, | |
"error": null, | |
"filters": [], | |
"hide": 0, | |
"label": null, | |
"name": "Filters", | |
"skipUrlSync": false, | |
"type": "adhoc" | |
}, | |
{ | |
"allValue": null, | |
"current": { | |
"selected": true, | |
"text": "50", | |
"value": "50" | |
}, | |
"description": null, | |
"error": null, | |
"hide": 0, | |
"includeAll": false, | |
"label": null, | |
"multi": false, | |
"name": "memory_overhead_percent", | |
"options": [ | |
{ | |
"selected": false, | |
"text": "50", | |
"value": "50" | |
}, | |
{ | |
"selected": false, | |
"text": "0", | |
"value": "0" | |
}, | |
{ | |
"selected": false, | |
"text": "10", | |
"value": "10" | |
}, | |
{ | |
"selected": false, | |
"text": "25", | |
"value": "25" | |
}, | |
{ | |
"selected": true, | |
"text": "100", | |
"value": "100" | |
} | |
], | |
"query": "50,0,10,25,100", | |
"queryValue": "", | |
"skipUrlSync": false, | |
"type": "custom" | |
}, | |
{ | |
"allValue": null, | |
"current": { | |
"selected": true, | |
"text": "10", | |
"value": "10" | |
}, | |
"description": null, | |
"error": null, | |
"hide": 0, | |
"includeAll": false, | |
"label": null, | |
"multi": false, | |
"name": "cpu_overhead_percent", | |
"options": [ | |
{ | |
"selected": false, | |
"text": "10", | |
"value": "10" | |
}, | |
{ | |
"selected": false, | |
"text": "0", | |
"value": "0" | |
}, | |
{ | |
"selected": false, | |
"text": "5", | |
"value": "5" | |
}, | |
{ | |
"selected": false, | |
"text": "25", | |
"value": "25" | |
}, | |
{ | |
"selected": false, | |
"text": "50", | |
"value": "50" | |
}, | |
{ | |
"selected": true, | |
"text": "100", | |
"value": "100" | |
} | |
], | |
"query": "10,0,5,25,50,100", | |
"queryValue": "", | |
"skipUrlSync": false, | |
"type": "custom" | |
}, | |
{ | |
"allValue": null, | |
"current": { | |
"selected": true, | |
"text": "0.95", | |
"value": "0.95" | |
}, | |
"description": null, | |
"error": null, | |
"hide": 0, | |
"includeAll": false, | |
"label": null, | |
"multi": false, | |
"name": "cpu_percentile", | |
"options": [ | |
{ | |
"selected": false, | |
"text": "0.95", | |
"value": "0.95" | |
}, | |
{ | |
"selected": false, | |
"text": "0.5", | |
"value": "0.5" | |
}, | |
{ | |
"selected": false, | |
"text": "0.75", | |
"value": "0.75" | |
}, | |
{ | |
"selected": false, | |
"text": "0.9", | |
"value": "0.9" | |
}, | |
{ | |
"selected": true, | |
"text": "0.99", | |
"value": "0.99" | |
} | |
], | |
"query": "0.95,0.5,0.75,0.9,0.99", | |
"queryValue": "", | |
"skipUrlSync": false, | |
"type": "custom" | |
}, | |
{ | |
"allValue": null, | |
"current": { | |
"selected": true, | |
"text": "0.99", | |
"value": "0.99" | |
}, | |
"description": null, | |
"error": null, | |
"hide": 0, | |
"includeAll": false, | |
"label": null, | |
"multi": false, | |
"name": "memory_percentile", | |
"options": [ | |
{ | |
"selected": true, | |
"text": "0.99", | |
"value": "0.99" | |
}, | |
{ | |
"selected": false, | |
"text": "0.5", | |
"value": "0.5" | |
}, | |
{ | |
"selected": false, | |
"text": "0.75", | |
"value": "0.75" | |
}, | |
{ | |
"selected": false, | |
"text": "0.9", | |
"value": "0.9" | |
}, | |
{ | |
"selected": false, | |
"text": "0.95", | |
"value": "0.95" | |
} | |
], | |
"query": "0.99,0.5,0.75,0.9,0.95", | |
"queryValue": "", | |
"skipUrlSync": false, | |
"type": "custom" | |
} | |
] | |
}, | |
"time": { | |
"from": "now-6h", | |
"to": "now" | |
}, | |
"timepicker": {}, | |
"timezone": "", | |
"title": "Overprovisioned & Underprovisioned Workloads", | |
"uid": "DIQg6M57k", | |
"version": 26 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment