Skip to content

Instantly share code, notes, and snippets.

@pavelnikolov
Created October 30, 2019 08:51
Show Gist options
  • Save pavelnikolov/fd6e2b19a4a30932b5fec267c9049f4d to your computer and use it in GitHub Desktop.
Save pavelnikolov/fd6e2b19a4a30932b5fec267c9049f4d to your computer and use it in GitHub Desktop.
Grafana dashboard for monitoring prometheus remote write.
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Updated version of Remote Storage Stats by skant ( https://grafana.com/dashboards/2009 ) valid for Grafana 6.2.2 and InfluxDB 1.7.6 \r\n\r\nRelevant when troubleshooting if your InfluxDB remote database is able to keep up with Prometheus writes",
"editable": true,
"gnetId": 10303,
"graphTooltip": 1,
"id": 11,
"iteration": 1572415041917,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 14,
"panels": [],
"repeat": null,
"title": "Samples sent to remote storage",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Per second rate of number of samples successfully sent to remote storage.",
"fill": 1,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 1
},
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"paceLength": 10,
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"aggregator": "sum",
"alias": "",
"counterMax": "",
"counterResetValue": "1",
"currentTagKey": "",
"currentTagValue": "",
"downsampleAggregator": "avg",
"downsampleFillPolicy": "none",
"downsampleInterval": "",
"explicitTags": false,
"expr": "sum(rate(prometheus_remote_storage_samples_in_total[2m]))",
"format": "time_series",
"hide": false,
"interval": "1m",
"intervalFactor": 1,
"isCounter": true,
"legendFormat": "total",
"metric": "prometheus_remote_storage_succeeded_samples_total",
"refId": "A",
"shouldComputeRate": true,
"tags": {}
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Samples",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 1
},
"id": 22,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"paceLength": 10,
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(prometheus_remote_storage_succeeded_samples_total[2m]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "succeeded",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Succeded Samples - Total",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 9
},
"id": 18,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"paceLength": 10,
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(prometheus_remote_storage_failed_samples_total[5m]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "failed",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Total Attempts to Send Samples that Failed ",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 9
},
"id": 20,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"paceLength": 10,
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "prometheus_remote_storage_pending_samples",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "pending samples",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Pending Samples",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 17
},
"id": 15,
"panels": [],
"repeat": null,
"title": "Queue & Latency",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Total attempts to enqueue information in a remote storage",
"fill": 1,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 18
},
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"paceLength": 10,
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"aggregator": "sum",
"alias": "",
"currentTagKey": "",
"currentTagValue": "",
"downsampleAggregator": "avg",
"downsampleFillPolicy": "none",
"downsampleInterval": "",
"expr": "increase(prometheus_remote_storage_enqueue_retries_total[5m])",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
"metric": "prometheus_remote_storage_queue_capacity",
"refId": "A",
"shouldComputeRate": false,
"tags": {}
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Total Retries to Enqueue",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"description": "50th and 95th percentiles of remote storage batch latency",
"fill": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 18
},
"id": 12,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"paceLength": 10,
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"aggregator": "p95",
"alias": "95th Percentile",
"counterMax": "",
"downsampleAggregator": "avg",
"downsampleFillPolicy": "none",
"downsampleInterval": "",
"expandHelper": 0,
"expr": "histogram_quantile(0.50, sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket[5m])) by (le))",
"format": "time_series",
"intervalFactor": 1,
"isCounter": false,
"legendFormat": "p50",
"metric": "prometheus_remote_storage_sent_batch_duration_seconds_bucket",
"refId": "B",
"shouldComputeRate": true,
"target": "Bosun Query"
},
{
"aggregator": "p95",
"alias": "95th Percentile",
"counterMax": "",
"downsampleAggregator": "avg",
"downsampleFillPolicy": "none",
"downsampleInterval": "",
"expandHelper": 0,
"expr": "histogram_quantile(0.95, sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket[5m])) by (le))",
"format": "time_series",
"intervalFactor": 1,
"isCounter": false,
"legendFormat": "p95",
"metric": "prometheus_remote_storage_sent_batch_duration_seconds_bucket",
"refId": "A",
"shouldComputeRate": true,
"target": "Bosun Query"
},
{
"aggregator": "p95",
"alias": "95th Percentile",
"counterMax": "",
"downsampleAggregator": "avg",
"downsampleFillPolicy": "none",
"downsampleInterval": "",
"expandHelper": 0,
"expr": "histogram_quantile(0.99, sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket[5m])) by (le))",
"format": "time_series",
"intervalFactor": 1,
"isCounter": false,
"legendFormat": "p99",
"metric": "prometheus_remote_storage_sent_batch_duration_seconds_bucket",
"refId": "C",
"shouldComputeRate": true,
"target": "Bosun Query"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 25
},
"id": 16,
"panels": [],
"repeat": null,
"title": "Shards & Dropped Samples",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Current number of active shards",
"editable": true,
"error": false,
"fill": 1,
"grid": {},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 26
},
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"options": {},
"paceLength": 10,
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"aggregator": "sum",
"alias": "",
"currentTagKey": "",
"currentTagValue": "",
"downsampleAggregator": "avg",
"downsampleFillPolicy": "none",
"downsampleInterval": "",
"explicitTags": false,
"expr": "prometheus_remote_storage_shards",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
"isCounter": false,
"legendFormat": "shards",
"metric": "prometheus_remote_storage_shards",
"refId": "A",
"shouldComputeRate": false,
"tags": {}
},
{
"expr": "prometheus_remote_storage_shards_desired",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "desired shards",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Remote storage shards",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "none",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "number of samples which were dropped/second due to the queue being full.",
"editable": true,
"error": false,
"fill": 1,
"grid": {},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 26
},
"id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"options": {},
"paceLength": 10,
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"aggregator": "sum",
"downsampleAggregator": "avg",
"downsampleFillPolicy": "none",
"downsampleInterval": "",
"expr": "sum(rate(prometheus_remote_storage_dropped_samples_total[5m]))",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
"isCounter": false,
"legendFormat": "dropped",
"metric": "prometheus_remote_storage_dropped_samples_total",
"refId": "A",
"shouldComputeRate": false
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Remote storage dropped samples",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "30s",
"schemaVersion": 17,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": true,
"tags": [],
"text": "prometheus",
"value": "prometheus"
},
"hide": 0,
"label": null,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"nowDelay": "2m",
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Prometheus - Remote Storage Stats",
"uid": "cs2yKyMWz",
"version": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment