Skip to content

Instantly share code, notes, and snippets.

@david-martin
Created August 2, 2023 11:41
Show Gist options
  • Save david-martin/3332a5d89717bebdfe8ce1bb887dafd8 to your computer and use it in GitHub Desktop.
Save david-martin/3332a5d89717bebdfe8ce1bb887dafd8 to your computer and use it in GitHub Desktop.
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "$datasource",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 1,
"id": 4,
"iteration": 1663771499928,
"links": [],
"panels": [
{
"datasource": "$datasource",
"description": "",
"fieldConfig": {
"defaults": {
"custom": {
"align": "center",
"displayMode": "color-text",
"filterable": false
},
"mappings": [
{
"from": "",
"id": 1,
"text": "GLBC unstable (kcp-stable)",
"to": "",
"type": 1,
"value": "kcp-stable-redhat-hcg-unstable"
},
{
"from": "",
"id": 2,
"text": "GLBC stable (kcp-stable)",
"to": "",
"type": 1,
"value": "kcp-stable-redhat-hcg"
},
{
"from": "",
"id": 3,
"text": "GLBC (kcp-unstable)",
"to": "",
"type": 1,
"value": "kcp-unstable-redhat-hcg"
},
{
"from": "",
"id": 4,
"text": "GLBC local",
"to": "",
"type": 1,
"value": "glbc-local"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 2,
"w": 20,
"x": 4,
"y": 0
},
"id": 51,
"options": {
"showHeader": false
},
"pluginVersion": "7.3.10",
"repeat": null,
"repeatDirection": "v",
"targets": [
{
"expr": "kube_pod_labels{namespace=\"$namespace\"}",
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "GLBC Environment",
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "filterFieldsByName",
"options": {
"include": {
"names": [
"label_glbc_name"
]
}
}
}
],
"type": "table"
},
{
"collapsed": false,
"datasource": "$datasource",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 2
},
"id": 39,
"panels": [],
"title": "Managed ingress objects",
"type": "row"
},
{
"datasource": "$datasource",
"description": "Shows the total number of managed ingress objects",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 2,
"w": 4,
"x": 0,
"y": 3
},
"id": 41,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {
"titleSize": 18,
"valueSize": 18
},
"textMode": "auto"
},
"pluginVersion": "7.3.10",
"targets": [
{
"exemplar": true,
"expr": "sum(glbc_ingress_managed_object_total{namespace=\"$namespace\"})",
"instant": true,
"interval": "",
"legendFormat": "Total",
"refId": "A"
}
],
"title": "Total Ingress objects",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the Ingress admission rate to load balancers, and the 90, 95 & 99th percentiles for how long admissions took.",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 20,
"x": 4,
"y": 3
},
"hiddenSeries": false,
"id": 43,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "Admission Rate",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(glbc_ingress_managed_object_time_to_admission_count{namespace=\"$namespace\"}[5m]))",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "Admission Rate",
"refId": "A"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.9, sum by(le) (rate(glbc_ingress_managed_object_time_to_admission_bucket{namespace=\"$namespace\"}[$__range])))",
"instant": false,
"interval": "",
"legendFormat": "90th %ile",
"refId": "B"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.95, sum by(le) (rate(glbc_ingress_managed_object_time_to_admission_bucket{namespace=\"$namespace\"}[$__range])))",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "95th %ile",
"refId": "C"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum by(le) (rate(glbc_ingress_managed_object_time_to_admission_bucket{namespace=\"$namespace\"}[$__range])))",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "99th %ile",
"refId": "D"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Ingress Admissions",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": "Admission time",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "none",
"label": "Admissions/sec",
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"datasource": "$datasource",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 10
},
"id": 15,
"title": "AWS Route53 - Rate, Errors & Duration (RED Method)",
"type": "row"
},
{
"datasource": "$datasource",
"description": "Shows aggregate data on DNS requests to AWS Route53 for the selected time range.\nNote that if a different DNS provider than Route53 is used, these values may be 0.",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "green",
"mode": "fixed"
},
"custom": {},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 4,
"x": 0,
"y": 11
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": true
},
"text": {
"titleSize": 18,
"valueSize": 18
},
"textMode": "auto"
},
"pluginVersion": "7.3.10",
"targets": [
{
"exemplar": true,
"expr": "sum(increase(glbc_aws_route53_request_total{namespace=\"$namespace\"}[$__range]))",
"format": "time_series",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "Total Requests",
"refId": "A"
},
{
"exemplar": true,
"expr": "sum(increase(glbc_aws_route53_request_errors_total{namespace=\"$namespace\"}[$__range]))",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "Total Errors",
"refId": "B"
}
],
"timeFrom": null,
"timeShift": null,
"title": "AWS Route53 Requests",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the rate of requests to Route53 for the selected time range. Note that if a different DNS provider than Route53 is used, these values may be 0.",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 20,
"x": 4,
"y": 11
},
"hiddenSeries": false,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(glbc_aws_route53_request_total{namespace=\"$namespace\"}[$__range]))",
"hide": false,
"interval": "",
"legendFormat": "Total Requests",
"refId": "C"
},
{
"exemplar": true,
"expr": "sum(rate(glbc_aws_route53_request_errors_total{namespace=\"$namespace\"}[$__range]))",
"hide": false,
"interval": "",
"legendFormat": "Total Errors",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "AWS Route53 Requests",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"datasource": "$datasource",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 19
},
"id": 13,
"panels": [],
"title": "TLS Certificates - Rate, Errors & Duration (RED Method)",
"type": "row"
},
{
"datasource": "$datasource",
"description": "The identifier of the certificate issuer configured for the controller in the selected namespace",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgb(245, 242, 214)",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 2,
"w": 4,
"x": 0,
"y": 20
},
"id": 9,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "name"
},
"pluginVersion": "7.3.10",
"targets": [
{
"exemplar": true,
"expr": "glbc_tls_certificate_secret_count{namespace=\"$namespace\"}",
"instant": true,
"interval": "",
"legendFormat": "{{issuer}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "TLS Cert Issuer",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the rate of certificate requests for the selected time range.",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 20,
"x": 4,
"y": 20
},
"hiddenSeries": false,
"id": 11,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(glbc_tls_certificate_request_total{namespace=\"$namespace\"}[$__range])) by (namespace)",
"hide": false,
"interval": "",
"legendFormat": "Total",
"refId": "C"
},
{
"exemplar": true,
"expr": "sum(rate(glbc_tls_certificate_request_total{namespace=\"$namespace\", result=\"succeeded\"}[$__range])) by(namespace)",
"hide": false,
"interval": "",
"legendFormat": "Total Succeeded",
"refId": "B"
},
{
"exemplar": true,
"expr": "sum(rate(glbc_tls_certificate_request_total{namespace=\"$namespace\", result=\"failed\"}[$__range])) by (namespace)",
"hide": false,
"interval": "",
"legendFormat": "Total Failed",
"refId": "A"
},
{
"exemplar": true,
"expr": "sum(rate(glbc_tls_certificate_secret_count{namespace=\"$namespace\" }[$__range])) by (namespace)",
"hide": false,
"interval": "",
"legendFormat": "Total Secrets",
"refId": "D"
},
{
"exemplar": true,
"expr": "sum(rate(glbc_tls_certificate_pending_request_count{namespace=\"$namespace\"}[$__range])) by (namespace)",
"hide": false,
"interval": "",
"legendFormat": "Total Pending",
"refId": "E"
},
{
"exemplar": true,
"expr": "sum(rate(glbc_tls_certificate_issuance_duration_seconds_bucket{namespace=\"$namespace\", le=\"+Inf\"}[$__range])) by (namespace) - sum(rate(glbc_tls_certificate_issuance_duration_seconds_bucket{namespace=\"$namespace\" , le=\"300\"}[$__range])) by (namespace)",
"hide": false,
"interval": "",
"legendFormat": "Total > 5min",
"refId": "F"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "TLS Cert Requests",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": "$datasource",
"description": "Shows aggregate data on certificate requests for the selected time range.",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "green",
"mode": "fixed"
},
"custom": {},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 4,
"x": 0,
"y": 22
},
"id": 7,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": true
},
"text": {
"titleSize": 18,
"valueSize": 18
},
"textMode": "auto"
},
"pluginVersion": "7.3.10",
"targets": [
{
"exemplar": true,
"expr": "sum(increase(glbc_tls_certificate_request_total{namespace=\"$namespace\"}[$__range])) by (namespace)",
"format": "time_series",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "Total",
"refId": "A"
},
{
"exemplar": true,
"expr": "sum(increase(glbc_tls_certificate_request_total{namespace=\"$namespace\", result=\"succeeded\"}[$__range])) by(namespace)",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "Total Succeeded",
"refId": "B"
},
{
"exemplar": true,
"expr": "sum(increase(glbc_tls_certificate_request_total{namespace=\"$namespace\", result=\"failed\"}[$__range])) by (namespace)",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "Total Failed",
"refId": "C"
},
{
"exemplar": true,
"expr": "sum(increase(glbc_tls_certificate_secret_count{namespace=\"$namespace\" }[$__range])) by (namespace)",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "Total Secrets",
"refId": "D"
},
{
"exemplar": true,
"expr": "sum(increase(glbc_tls_certificate_pending_request_count{namespace=\"$namespace\"}[$__range])) by (namespace)",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "Total Pending",
"refId": "E"
},
{
"exemplar": true,
"expr": "sum(increase(glbc_tls_certificate_issuance_duration_seconds_bucket{namespace=\"$namespace\", le=\"+Inf\"}[$__range])) by (namespace) - sum(increase(glbc_tls_certificate_issuance_duration_seconds_bucket{namespace=\"$namespace\" , le=\"300\"}[$__range])) by (namespace)",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "Total > 5m",
"refId": "F"
}
],
"timeFrom": null,
"timeShift": null,
"title": "TLS Cert Requests",
"type": "stat"
},
{
"collapsed": false,
"datasource": "$datasource",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 28
},
"id": 17,
"panels": [],
"title": "Pods - Utilisation, Saturation & Errors (USE Method)",
"type": "row"
},
{
"datasource": "$datasource",
"description": "Shows the number of pods by status in the selected namespace. Note this is an 'Instant' value i.e. it uses the latest value instead of querying over a range.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 4,
"x": 0,
"y": 29
},
"id": 19,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {
"titleSize": 18,
"valueSize": 18
},
"textMode": "auto"
},
"pluginVersion": "7.3.10",
"targets": [
{
"exemplar": true,
"expr": "sum(kube_pod_status_phase{namespace=\"$namespace\"}) by(phase)",
"instant": true,
"interval": "",
"legendFormat": "{{phase}}",
"refId": "A"
}
],
"title": "Pod Statuses",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows pod CPU usage for each pod in the namespace. If set, pod CPU requests and limits are also shown. The legend includes a suffix of the pod name.",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 20,
"x": 4,
"y": 29
},
"hiddenSeries": false,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "kube_pod_container_resource_limits{namespace=\"$namespace\", resource=\"cpu\"}",
"hide": false,
"interval": "",
"legendFormat": "CPU Limits {{pod}}",
"refId": "C"
},
{
"exemplar": true,
"expr": "kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"cpu\"}",
"hide": false,
"interval": "",
"legendFormat": "CPU Requests {{pod}}",
"refId": "B"
},
{
"exemplar": true,
"expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\"}[1m])) by (pod)",
"interval": "",
"legendFormat": "CPU Usage {{pod}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "CPU",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": "$datasource",
"description": "Shows the number of container restarts (by pod) over the chosen time range for the selected namespace.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 4,
"x": 0,
"y": 35
},
"id": 20,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {
"titleSize": 18,
"valueSize": 18
},
"textMode": "value_and_name"
},
"pluginVersion": "7.3.10",
"targets": [
{
"exemplar": true,
"expr": "sum(kube_pod_container_status_restarts_total{namespace=\"$namespace\"}) by(pod)",
"instant": false,
"interval": "",
"legendFormat": "{{pod}}",
"refId": "A"
}
],
"title": "Container Restarts",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows pod memory usage in MiB (based on RSS) for each pod in the namespace. If set, pod memory requests and limits are also shown. The legend includes a suffix of the pod name.",
"fieldConfig": {
"defaults": {
"custom": {},
"unit": "bytes"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 20,
"x": 4,
"y": 37
},
"hiddenSeries": false,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"memory\"}",
"interval": "",
"legendFormat": "Pod Memory Requests {{pod}}",
"refId": "A"
},
{
"exemplar": true,
"expr": "kube_pod_container_resource_limits{namespace=\"$namespace\", resource=\"memory\"}",
"hide": false,
"interval": "",
"legendFormat": "Pod Memory Limits {{pod}}",
"refId": "C"
},
{
"exemplar": true,
"expr": "sum(container_memory_rss{namespace=\"$namespace\", container=\"manager\"}) by (pod)",
"hide": false,
"interval": "",
"legendFormat": "Pod Memory {{pod}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Memory",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the total number of Goroutines by pod. Only 1 line is expected for the glbc pod.",
"fieldConfig": {
"defaults": {
"custom": {},
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 20,
"x": 4,
"y": 45
},
"hiddenSeries": false,
"id": 37,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(go_goroutines{namespace=\"$namespace\"}) by(pod)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Goroutines",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"datasource": "$datasource",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 53
},
"id": 30,
"panels": [],
"title": "Reconciliation & Workers - Rate, Errors & Duration (RED Method)",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the rate of successful reconcile loops",
"fieldConfig": {
"defaults": {
"custom": {},
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 54
},
"hiddenSeries": false,
"id": 32,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(glbc_controller_reconcile_total{namespace=\"$namespace\",result=\"success\"}[5m])) by (controller)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{controller}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Controller Reconcile Rate (success)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the rate of resources being queued for processing by each controller.",
"fieldConfig": {
"defaults": {
"custom": {},
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 54
},
"hiddenSeries": false,
"id": 34,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(workqueue_adds_total{namespace=\"$namespace\"}[5m])) by (instance, name)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Work Queue Add Rate",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the rate of failed/erroring reconcile loops",
"fieldConfig": {
"defaults": {
"custom": {},
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 61
},
"hiddenSeries": false,
"id": 33,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(glbc_controller_reconcile_total{namespace=\"$namespace\",result=\"error\"}[5m])) by (controller)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{controller}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Controller Reconcile Rate (error)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the queue size for each resource controller.",
"fieldConfig": {
"defaults": {
"custom": {},
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 61
},
"hiddenSeries": false,
"id": 35,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(workqueue_depth{namespace=\"$namespace\"}[5m])) by (instance, name)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Work Queue Depth",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the queue processing latency for each resource controller.",
"fieldConfig": {
"defaults": {
"custom": {},
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 68
},
"hiddenSeries": false,
"id": 36,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{namespace=\"$namespace\"}[5m])) by (instance, name, le))",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Work Queue Latency",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"datasource": "$datasource",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 75
},
"id": 45,
"panels": [],
"title": "KCP API Requests - Rate, Errors & Duration (RED Method)",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the rate of non 5xx requests per HTTP method.",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 76
},
"hiddenSeries": false,
"id": 47,
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "rate(rest_client_requests_total{namespace=\"$namespace\",code!~\"5.*\"}[5m])",
"interval": "",
"legendFormat": "{{code}} {{method}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Request Rate - Non 5xx",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the rate of 5xx requests per HTTP method.",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 76
},
"hiddenSeries": false,
"id": 49,
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "rate(rest_client_requests_total{namespace=\"$namespace\",code=~\"5.*\"}[5m])",
"interval": "",
"legendFormat": "{{code}} {{method}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Request Rate - 5xx",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Shows the average request times & various percentiles aggregated across all requests.",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 84
},
"hiddenSeries": false,
"id": 48,
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{namespace=~\"$namespace\"}[$__range])) by(le))",
"interval": "",
"legendFormat": "99th %ile",
"refId": "A"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.95, sum(rate(rest_client_request_latency_seconds_bucket{namespace=~\"$namespace\"}[$__range])) by(le))",
"hide": false,
"interval": "",
"legendFormat": "95th %ile",
"refId": "B"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.90, sum(rate(rest_client_request_latency_seconds_bucket{namespace=~\"$namespace\"}[$__range])) by(le))",
"hide": false,
"interval": "",
"legendFormat": "90th %ile",
"refId": "C"
},
{
"exemplar": true,
"expr": "sum(rest_client_request_latency_seconds_sum{namespace=~\"$namespace\"}) / sum(rest_client_request_latency_seconds_count{namespace=~\"$namespace\"})",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "Average",
"refId": "D"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Request Times",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": false,
"schemaVersion": 26,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "default",
"value": "default"
},
"error": null,
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"queryValue": "",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"allValue": null,
"current": {
"selected": true,
"text": "kcp-76e6x0ld7z27",
"value": "kcp-76e6x0ld7z27"
},
"datasource": "$datasource",
"definition": "label_values(glbc_controller_reconcile_total, namespace)",
"description": null,
"error": null,
"hide": 0,
"includeAll": false,
"label": "Namespace (filtered to glbc namespaces)",
"multi": false,
"name": "namespace",
"options": [],
"query": "label_values(glbc_controller_reconcile_total, namespace)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"nowDelay": "",
"refresh_intervals": [],
"time_options": []
},
"timezone": "browser",
"title": "GLBC / Overview",
"uid": "75f3d9c692690c6badc848e02a3d6e1b82444622",
"version": 6
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment