Skip to content

Instantly share code, notes, and snippets.

@benjvi
Created January 26, 2022 16:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benjvi/e5c44f05e77f146de5ee7c177464ee8b to your computer and use it in GitHub Desktop.
Save benjvi/e5c44f05e77f146de5ee7c177464ee8b to your computer and use it in GitHub Desktop.
Backup Execution Dashboard - CronJobs & Velero
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "7.5.4"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "panel",
"id": "piechart",
"name": "Pie chart v2",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"links": [],
"panels": [
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 13,
"panels": [],
"title": "CronJobs",
"type": "row"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": null,
"filterable": false
},
"mappings": [],
"noValue": "-",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 12
},
{
"color": "red",
"value": 24
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Last Successful (h)"
},
"properties": [
{
"id": "custom.displayMode",
"value": "color-background"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Last Scheduled (h)"
},
"properties": [
{
"id": "custom.displayMode",
"value": "color-background"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Last Failed (d)"
},
"properties": [
{
"id": "custom.displayMode",
"value": "color-background"
},
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 0.0001
},
{
"color": "orange",
"value": 3
},
{
"color": "#EAB839",
"value": 10
},
{
"color": "green",
"value": 30
}
]
}
},
{
"id": "custom.width",
"value": null
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 1
},
"id": 2,
"options": {
"showHeader": true,
"sortBy": []
},
"pluginVersion": "7.5.4",
"targets": [
{
"exemplar": true,
"expr": "# note: doesn't account for schedules with no successful runs in time period\n(\n time()\n - \n (\n max(\n # get jobs associated to a Cronjob\n kube_job_owner{owner_kind=\"CronJob\"} \n * \n on (namespace, job_name) \n group_right(owner_name) \n (\n (kube_job_status_succeeded == 1) \n * \n on (namespace, job_name) \n group_right(owner_name) \n (kube_job_status_completion_time) \n )\n ) by (namespace, owner_name) \n )\n) / 3600",
"format": "table",
"instant": true,
"interval": "",
"legendFormat": "",
"queryType": "randomWalk",
"refId": "A"
},
{
"exemplar": true,
"expr": "# note: doesn't account for schedules with no failed runs in time period\n(\n time()\n - \n (\n max(\n # get jobs associated to a Cronjob\n kube_job_owner{owner_kind=\"CronJob\"} \n * \n on (namespace, job_name) \n group_right(owner_name) \n (\n (kube_job_status_failed == 1) \n * \n on (namespace, job_name) \n group_right(owner_name) \n (kube_job_status_start_time) \n )\n ) by (namespace, owner_name) \n )\n) / 86400",
"format": "table",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "B"
},
{
"exemplar": true,
"expr": "(\n time()\n -\n max(\n label_replace(kube_cronjob_status_last_schedule_time, \"owner_name\", \"$1\", \"cronjob\", \"(.*)\")\n ) by (namespace, owner_name) \n) / 3600",
"format": "table",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "C"
}
],
"title": "Age of Last CronJob Runs",
"transformations": [
{
"id": "merge",
"options": {}
},
{
"id": "filterFieldsByName",
"options": {}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"indexByName": {},
"renameByName": {
"Value #A": "Last Successful (h)",
"Value #B": "Last Failed (d)",
"Value #C": "Last Scheduled (h)",
"owner_name": ""
}
}
}
],
"type": "table"
},
{
"datasource": "${DS_PROMETHEUS}",
"description": "Jobs in the current time period *AND* retained according to the CronJob's retention policy",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": null,
"filterable": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 1
},
{
"color": "red",
"value": 2
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Not Successful"
},
"properties": [
{
"id": "custom.displayMode",
"value": "color-background"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Last Run (h)"
},
"properties": [
{
"id": "custom.displayMode",
"value": "color-background"
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 8
},
"id": 11,
"options": {
"showHeader": true,
"sortBy": [
{
"desc": false,
"displayName": "Total"
}
]
},
"pluginVersion": "7.5.4",
"targets": [
{
"exemplar": true,
"expr": "count(\n # get jobs associated to a Cronjob\n kube_job_owner{owner_kind=\"CronJob\"} \n * \n on (namespace, job_name) \n group_right(owner_name) \n avg(kube_job_status_succeeded == 1) by (namespace,job_name)\n * \n on (namespace, job_name) \n group_right(owner_name) \n kube_job_status_start_time > ${__from:date:seconds}\n) by (namespace, owner_name) ",
"format": "table",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "",
"queryType": "randomWalk",
"refId": "A"
},
{
"exemplar": true,
"expr": "count(\n # get jobs associated to a Cronjob\n kube_job_owner{owner_kind=\"CronJob\"} \n * \n on (namespace, job_name) \n group_right(owner_name) \n avg(kube_job_status_failed == 1) by (namespace,job_name)\n * \n on (namespace, job_name) \n group_right(owner_name) \n kube_job_status_start_time > ${__from:date:seconds}\n) by (namespace, owner_name) ",
"format": "table",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "B"
},
{
"exemplar": true,
"expr": "count(\n # get jobs associated to a Cronjob\n kube_job_owner{owner_kind=\"CronJob\"} \n * \n on (namespace, job_name) \n group_right(owner_name) \n avg(kube_job_info) by (namespace,job_name)\n * \n on (namespace, job_name) \n group_right(owner_name) \n kube_job_status_start_time > ${__from:date:seconds}\n) by (namespace, owner_name) ",
"format": "table",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "C"
},
{
"exemplar": true,
"expr": "count(\n # get jobs associated to a Cronjob\n kube_job_owner{owner_kind=\"CronJob\"} \n * \n on (namespace, job_name) \n group_right(owner_name) \n avg(kube_job_info) by (namespace,job_name)\n * \n on (namespace, job_name) \n group_right(owner_name) \n kube_job_status_start_time > ${__from:date:seconds}\n) by (namespace, owner_name) \n-\ncount(\n # get jobs associated to a Cronjob\n kube_job_owner{owner_kind=\"CronJob\"} \n * \n on (namespace, job_name) \n group_right(owner_name) \n avg(kube_job_status_succeeded == 1) by (namespace,job_name)\n * \n on (namespace, job_name) \n group_right(owner_name) \n kube_job_status_start_time > ${__from:date:seconds}\n) by (namespace, owner_name) ",
"format": "table",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "",
"refId": "D"
}
],
"title": "CronJob Jobs Recently Run",
"transformations": [
{
"id": "merge",
"options": {}
},
{
"id": "filterFieldsByName",
"options": {}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"indexByName": {
"Time": 0,
"Value #A": 4,
"Value #C": 3,
"Value #D": 5,
"namespace": 1,
"owner_name": 2
},
"renameByName": {
"Value #A": "Successful",
"Value #B": "Failed",
"Value #C": "Total",
"Value #D": "Not Successful",
"{namespace=\"argo-events\", owner_name=\"load-askgit-data\"}": "Not"
}
}
}
],
"type": "table"
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 15
},
"id": 4,
"panels": [],
"title": "Velero",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 2,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 7,
"x": 0,
"y": 16
},
"hiddenSeries": false,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"$$hashKey": "object:209",
"alias": "attempts",
"stack": false
}
],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(velero_backup_attempt_total)",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "attempts",
"queryType": "randomWalk",
"refId": "A"
},
{
"exemplar": true,
"expr": "sum(velero_backup_partial_failure_total)",
"format": "time_series",
"hide": false,
"interval": "",
"legendFormat": "completed with partial failures",
"refId": "B"
},
{
"exemplar": true,
"expr": "sum(velero_backup_success_total)",
"hide": false,
"interval": "",
"legendFormat": "succeeded",
"refId": "C"
},
{
"exemplar": true,
"expr": "sum(velero_backup_validation_failure_total)",
"hide": false,
"interval": "",
"legendFormat": "failed",
"refId": "D"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Backups",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:51",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:52",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 7,
"x": 7,
"y": 16
},
"id": 8,
"options": {
"displayLabels": [
"value"
],
"legend": {
"displayMode": "list",
"placement": "right",
"values": []
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {}
},
"pluginVersion": "7.5.4",
"targets": [
{
"exemplar": true,
"expr": "sum(delta(velero_backup_attempt_total[$__range])) by (schedule) + on(schedule) (((time() - velero_backup_last_successful_timestamp) / 86400 < 7 ) * 0)",
"format": "time_series",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "schedule: - {{ schedule }}",
"queryType": "randomWalk",
"refId": "A"
},
{
"exemplar": true,
"expr": "velero_backup_attempt_total + on(schedule) (((time() - velero_backup_last_successful_timestamp) / 86400 < 7 ) * 0)",
"hide": true,
"interval": "",
"legendFormat": "",
"refId": "B"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Attempted Backups By Schedule In Time Range",
"type": "piechart"
},
{
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 24
},
{
"color": "red",
"value": 48
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 5,
"x": 14,
"y": 16
},
"id": 10,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "7.5.4",
"targets": [
{
"exemplar": true,
"expr": "min((time() - velero_backup_last_successful_timestamp) / 3600)",
"format": "time_series",
"instant": false,
"interval": "",
"legendFormat": "",
"queryType": "randomWalk",
"refId": "A"
}
],
"title": "Hours since last successful backup",
"type": "stat"
}
],
"refresh": false,
"schemaVersion": 27,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-7d",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Backup Execution",
"uid": "LcwXjBcnz",
"version": 14
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment