Skip to content

Instantly share code, notes, and snippets.

@mmgrt
Last active August 19, 2022 13:30
Show Gist options
  • Save mmgrt/9d5a2955f0bc9cf86eabe7554919a4ef to your computer and use it in GitHub Desktop.
Save mmgrt/9d5a2955f0bc9cf86eabe7554919a4ef to your computer and use it in GitHub Desktop.
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: adoring-pasteur-d4d00d
spec:
color: '#326BBA'
name: inputs.net
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: agreeing-goldberg-d4d013
spec:
color: '#326BBA'
name: inputs.kernel
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: dreamy-heyrovsky-d4d011
spec:
color: '#326BBA'
name: inputs.disk
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: elastic-morse-d4d001
spec:
color: '#326BBA'
name: inputs.swap
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: elegant-yonath-d4d015
spec:
color: '#326BBA'
name: inputs.system
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: gallant-sanderson-d4d003
spec:
color: '#326BBA'
name: inputs.processes
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: goofy-yalow-d4d005
spec:
color: '#326BBA'
name: inputs.diskio
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: inspiring-goodall-d4d00b
spec:
color: '#326BBA'
name: inputs.docker
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: nice-kilby-d4d009
spec:
color: '#108174'
name: outputs.influxdb_v2
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: unruffled-benz-d4d007
spec:
color: '#326BBA'
name: inputs.cpu
---
apiVersion: influxdata.com/v2alpha1
kind: Label
metadata:
name: vigilant-chatelet-d4d00f
spec:
color: '#326BBA'
name: inputs.mem
---
apiVersion: influxdata.com/v2alpha1
kind: CheckThreshold
metadata:
name: crumbling-meitner-14d002
spec:
description: Container mem usage is above 80% for 15 minutes
every: 15m0s
name: Container Mem Usage Pct
query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "docker_container_mem")
|> filter(fn: (r) => r["_field"] == "usage_percent")
|> aggregateWindow(every: 15m, fn: mean)
|> yield(name: "mean")
status: active
statusMessageTemplate: 'Check: ${ r._check_name } for container {r.container_name}
is: ${ r._level } '
thresholds:
- level: WARN
type: greater
value: 80
- level: CRIT
type: greater
value: 90
- level: OK
type: lesser
value: 80
---
apiVersion: influxdata.com/v2alpha1
kind: CheckThreshold
metadata:
name: ecstatic-mendeleev-d4d01b
spec:
description: Container disk usage is above 80%
every: 15m0s
name: Container Disk Usage
query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "docker_container_blkio")
|> filter(fn: (r) => r["_field"] == "usage_percent")
|> aggregateWindow(every: 15m, fn: mean)
|> yield(name: "mean")
status: active
statusMessageTemplate: 'Check: ${ r._check_name } is: ${ r._level }'
thresholds:
- level: WARN
type: greater
value: 80
- level: CRIT
type: greater
value: 90
- level: OK
type: lesser
value: 80
---
apiVersion: influxdata.com/v2alpha1
kind: CheckThreshold
metadata:
name: frosty-greider-14d004
spec:
description: Container exits with a non zero exit status
every: 5m0s
name: Container Non Zero Exit Code
query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "docker_container_status")
|> filter(fn: (r) => r["_field"] == "exitcode")
|> unique()
|> yield(name: "unique")
status: active
statusMessageTemplate: 'Check: ${ r._check_name } for container {r.container_name}
has exited with a non 0 ecit code'
thresholds:
- level: CRIT
max: 0
min: 0
type: outside_range
---
apiVersion: influxdata.com/v2alpha1
kind: CheckThreshold
metadata:
name: gracious-bose-14d000
spec:
description: Container cpu usage is above 80% for 15 minutes
every: 15m0s
name: Container CPU Usage Pct
query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "docker_container_cpu")
|> filter(fn: (r) => r["_field"] == "usage_percent")
|> aggregateWindow(every: 15m, fn: mean)
|> yield(name: "mean")
status: active
statusMessageTemplate: 'Check: ${ r._check_name } for container {r.container_name}
is: ${ r._level }'
thresholds:
- level: WARN
type: greater
value: 80
- level: CRIT
type: greater
value: 90
- level: OK
type: lesser
value: 80
---
apiVersion: influxdata.com/v2alpha1
kind: NotificationEndpointHTTP
metadata:
name: earnest-ishizaka-14d006
spec:
method: POST
name: HTTP POST
status: active
type: none
url: http://localhost:25378/alert
---
apiVersion: influxdata.com/v2alpha1
kind: NotificationRule
metadata:
name: gifted-haslett-14d008
spec:
endpointName: earnest-ishizaka-14d006
every: 5m0s
name: Crit Notifier
statusRules:
- currentLevel: CRIT
previousLevel: OK
---
apiVersion: influxdata.com/v2alpha1
kind: Dashboard
metadata:
name: earning-boyd-94d001
spec:
associations:
- kind: Label
name: inspiring-goodall-d4d00b
- kind: Label
name: elegant-yonath-d4d015
charts:
- height: 1
kind: Markdown
name: Name this Cell
note: This dashboard gives you an overview of [Docker](https://docker.com)
metrics. See the [Telegraf Documentation](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/docker)
for help configuring these plugins.
width: 12
- height: 1
kind: Markdown
name: Name this Cell
note: '# Docker Daemon'
width: 12
yPos: 1
- colors:
- hex: '#00C9FF'
name: laser
type: text
decimalPlaces: 2
height: 1
kind: Single_Stat
name: Num Images
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "docker")
|> filter(fn: (r) => r["_field"] == "n_images")
width: 2
yPos: 2
- height: 1
kind: Markdown
name: Name this Cell
note: '# System Stats'
width: 12
yPos: 3
- colors:
- hex: '#00C9FF'
name: laser
type: text
height: 1
kind: Single_Stat
name: System Uptime
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "system")
|> filter(fn: (r) => r["_field"] == "uptime")
|> map(fn: (r) => ({r with _value: float(v: r._value) / 86400.0}))
suffix: ' days'
width: 2
yPos: 4
- axes:
- base: "10"
name: x
scale: linear
- base: "2"
name: y
scale: linear
- base: "10"
name: y2
scale: linear
geom: line
height: 2
kind: Xy
name: Swap
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart)
|> filter(fn: (r) => r._measurement == "swap")
|> filter(fn: (r) => r._field == "total" or r._field == "used")
|> window(period: v.windowPeriod)
|> mean()
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> yield(name: "mean")
width: 3
yPos: 5
- axes:
- base: "10"
name: y2
scale: linear
- base: "10"
name: x
scale: linear
- base: "2"
label: Bytes
name: y
scale: linear
geom: line
height: 3
kind: Xy
name: Network TX trafic per container / sec
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop:v.timeRangeStop)
|> filter(fn: (r) => r._measurement == "docker_container_net")
|> filter(fn: (r) => r._field == "tx_bytes")
|> derivative(unit:1s, nonNegative: false)
|> window(period: v.windowPeriod)
|> mean()
|> keep(columns: ["_measurement","container_name", "host","_value","_field","_stop"])
|> group(columns: ["_value", "_time", "_start", "_stop"],mode: "except")
width: 6
xCol: _stop
yCol: _value
yPos: 7
- height: 1
kind: Markdown
name: Name this Cell
note: '# Container Stats'
width: 12
yPos: 10
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
suffix: '%'
- base: "10"
name: y2
scale: linear
geom: line
height: 3
kind: Xy
name: Disk Usage
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart)
|> filter(fn: (r) => r._measurement == "disk")
|> filter(fn: (r) => r._field == "used_percent")
|> window(period: v.windowPeriod)
|> mean()
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> yield(name: "mean")
width: 3
yPos: 11
- colors:
- hex: '#00C9FF'
name: laser
type: text
decimalPlaces: 2
height: 1
kind: Single_Stat
name: Total Num Containers
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "docker")
|> filter(fn: (r) => r["_field"] == "n_containers")
width: 2
xPos: 2
yPos: 2
- colors:
- hex: '#00C9FF'
name: laser
type: text
decimalPlaces: 2
height: 1
kind: Single_Stat
name: System Load
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart)
|> filter(fn: (r) => r._measurement == "system")
|> filter(fn: (r) => r._field == "load1")
|> window(period: v.windowPeriod)
|> mean()
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> yield(name: "mean")
width: 1
xPos: 2
yPos: 4
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
label: Load
name: y
scale: linear
- base: "10"
name: y2
scale: linear
geom: line
height: 3
kind: Xy
name: System Load
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart)
|> filter(fn: (r) => r._measurement == "system")
|> filter(fn: (r) => r._field == "load1" or r._field == "load5" or r._field == "load15")
|> window(period: v.windowPeriod)
|> mean()
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> yield(name: "mean")
width: 3
xPos: 3
yPos: 4
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
geom: line
height: 3
kind: Xy
name: Memory usage % per container
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r._measurement == "docker_container_mem")
|> filter(fn: (r) => r._field == "usage_percent")
|> window(period: v.windowPeriod)
|> mean()
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> keep(columns: ["_measurement","container_name", "host","_value","_field","_stop"])
|> yield(name: "mean")
width: 3
xPos: 3
yPos: 11
- colors:
- hex: '#00C9FF'
name: laser
type: text
decimalPlaces: 2
height: 1
kind: Single_Stat
name: Number of Runnng Docker containers
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "docker")
|> filter(fn: (r) => r["_field"] == "n_containers_running")
|> group(columns: ["engine_host"])
width: 2
xPos: 4
yPos: 2
- colors:
- hex: '#00C9FF'
name: laser
type: text
decimalPlaces: 2
height: 1
kind: Single_Stat
name: Docker Daemon nCPUs
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart)
|> filter(fn: (r) => r._measurement == "docker")
|> filter(fn: (r) => r._field == "n_cpus")
|> window(period: v.windowPeriod)
|> last()
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> yield(name: "last")
suffix: ' cpus'
width: 2
xPos: 6
yPos: 2
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
suffix: '%'
- base: "10"
name: y2
scale: linear
geom: line
height: 3
kind: Xy
name: CPU Usage
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart)
|> filter(fn: (r) => r._measurement == "cpu")
|> filter(fn: (r) => r._field == "usage_user" or r._field == "usage_system" or r._field == "usage_idle")
|> filter(fn: (r) => r.cpu == "cpu-total")
|> window(period: v.windowPeriod)
|> mean()
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> yield(name: "mean")
width: 3
xPos: 6
yPos: 4
- axes:
- base: "10"
name: x
scale: linear
- base: "2"
label: Bytes
name: y
scale: linear
- base: "10"
name: y2
scale: linear
colors:
- hex: '#FDC44F'
name: Cthulhu
type: scale
- hex: '#007C76'
name: Cthulhu
type: scale
- hex: '#8983FF'
name: Cthulhu
type: scale
geom: line
height: 3
kind: Xy
name: Network RX trafic per container / sec
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r._measurement == "docker_container_net")
|> filter(fn: (r) => r._field == "rx_bytes" )
|> derivative(unit: 1s, nonNegative: false)
|> window(period: v.windowPeriod)
|> mean()
|> keep(columns: ["_measurement","container_name", "host","_value","_field","_stop"])
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
width: 6
xCol: _stop
xPos: 6
yCol: _value
yPos: 7
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
geom: line
height: 3
kind: Xy
name: Memory usage per container
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r._measurement == "docker_container_mem")
|> filter(fn: (r) => r._field == "usage")
|> window(period: v.windowPeriod)
|> mean()
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> keep(columns: ["_measurement","container_name", "host","_value","_field","_stop"])
|> yield(name: "mean")
width: 3
xPos: 6
yPos: 11
- colors:
- hex: '#00C9FF'
name: laser
type: text
decimalPlaces: 2
height: 1
kind: Single_Stat
name: Docker Daemon Total Mem
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart)
|> filter(fn: (r) => r._measurement == "docker")
|> filter(fn: (r) => r._field == "memory_total")
|> window(period: v.windowPeriod)
|> last()
|> map(fn: (r) => ({r with _value: float(v: r._value) / 1024.0 / 1024.0 / 1024.0}))
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> yield(name: "last")
suffix: ' GB'
width: 2
xPos: 8
yPos: 2
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
suffix: '%'
- base: "10"
name: y2
scale: linear
colors:
- hex: '#00C9FF'
name: laser
type: text
- hex: '#8F8AF4'
name: Do Androids Dream of Electric Sheep?
type: scale
- hex: '#A51414'
name: Do Androids Dream of Electric Sheep?
type: scale
- hex: '#F4CF31'
name: Do Androids Dream of Electric Sheep?
type: scale
decimalPlaces: 1
height: 3
kind: Single_Stat_Plus_Line
name: System Memory Usage
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart)
|> filter(fn: (r) => r._measurement == "mem")
|> filter(fn: (r) => r._field == "used_percent")
|> window(period: v.windowPeriod)
|> mean()
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> yield(name: "mean")
suffix: '%'
width: 3
xCol: _stop
xPos: 9
yCol: _value
yPos: 4
- axes:
- base: "10"
name: x
scale: linear
- base: "10"
name: y
scale: linear
geom: line
height: 3
kind: Xy
name: CPU usage per container
queries:
- query: |-
from(bucket: v.bucket)
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r._measurement == "cpu" or r._measurement == "docker_container_cpu")
|> filter(fn: (r) => r._field == "usage_percent")
|> window(period: v.windowPeriod)
|> mean()
|> group(columns: ["_value", "_time", "_start", "_stop"], mode: "except")
|> keep(columns: ["_measurement","container_name", "host","_value","_field","_stop"])
|> yield(name: "mean")
width: 3
xPos: 9
yPos: 11
description: A collection of useful visualizations for monitoring Docker stats
name: Docker
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment