Skip to content

Instantly share code, notes, and snippets.

@Tombar
Created April 30, 2019 19:09
Show Gist options
  • Save Tombar/5ff53dee7017bb146641c0f7de27232e to your computer and use it in GitHub Desktop.
Save Tombar/5ff53dee7017bb146641c0f7de27232e to your computer and use it in GitHub Desktop.
{{- if and .Values.connect.enabled .Values.monitoring.enabled -}}
apiVersion: v1
kind: ConfigMap
metadata:
name: connect-dashboard
labels:
grafana-dashboard: "true"
app: {{ template "name" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
chart: {{ template "chart" . }}
{{- with .Values.annotations }}
annotations:
{{ toYaml . }}
{{- end }}
data:
{{ (.Files.Glob "dashboards/strimzi-connect.json").AsConfig | indent 2 }}
{{- end }}
{{- if and .Values.monitoring.enabled .Values.connect.enabled -}}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "name" . }}-kafka-connect
labels:
app: {{ template "name" . }}-kafka-connect
chart: {{ template "chart" . }}
release: {{ $.Release.Name }}
heritage: {{ $.Release.Service }}
wek8s-monitoring: "true"
team: {{ .Values.monitoring.team }}
spec:
jobLabel: {{ template "name" . }}
selector:
matchLabels:
strimzi.io/cluster: {{ template "name" . }}
strimzi.io/kind: KafkaConnect
strimzi.io/name: {{ template "name" . }}-connect-api
targetLabels:
- app
- env
- strimzi.io/cluster
- strimzi.io/kind
- strimzi.io/name
endpoints:
- port: metrics
interval: 5s
{{- end }}
{{- if and .Values.kafka.install .Values.monitoring.enabled -}}
apiVersion: v1
kind: ConfigMap
metadata:
name: kafka-dashboard
labels:
grafana-dashboard: "true"
app: {{ template "name" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
chart: {{ template "chart" . }}
{{- with .Values.annotations }}
annotations:
{{ toYaml . }}
{{- end }}
data:
{{ (.Files.Glob "dashboards/strimzi-kafka.json").AsConfig | indent 2 }}
{{ (.Files.Glob "dashboards/strimzi-zookeeper.json").AsConfig | indent 2 }}
{{- end }}
{{- if and .Values.monitoring.enabled .Values.kafka.install -}}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ template "name" . }}
labels:
wek8s-monitoring: "true"
app: {{ template "name" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
chart: {{ template "chart" . }}
{{- with .Values.annotations }}
annotations:
{{ toYaml . }}
{{- end }}
spec:
groups:
# from https://github.com/strimzi/strimzi-kafka-operator/blob/master/metrics/examples/prometheus/alerting-rules.yaml
- name: kafka-alerts
rules:
- alert: KafkaUnderReplicatedPartitions
expr: kafka_server_replicamanager_underreplicatedpartitions > 0
for: 10s
labels:
severity: warning
namespace: {{ .Release.Namespace }}
alert_source: {{ .Release.Name }}
annotations:
summary: 'Kafka under replicated partitions'
description: 'There are {{ "{{" }} $value {{ "}}" }} under replicated partitions on {{ "{{" }} $labels.kubernetes_pod_name {{ "}}" }}'
- alert: KafkaAbnormalControllerState
expr: sum(kafka_controller_kafkacontroller_activecontrollercount) != 1
for: 10s
labels:
severity: warning
namespace: {{ .Release.Namespace }}
alert_source: {{ .Release.Name }}
annotations:
summary: 'Kafka abnormal controller state'
description: 'There are {{ "{{" }} $value {{ "}}" }} active controllers in the cluster'
- alert: KafkaUnderMinIsrPartitionCount
expr: kafka_server_replicamanager_underminisrpartitioncount > 0
for: 10s
labels:
severity: warning
namespace: {{ .Release.Namespace }}
alert_source: {{ .Release.Name }}
annotations:
summary: 'Kafka under min ISR partitions'
description: 'There are {{ "{{" }} $value {{ "}}" }} partitions under the min ISR on {{ "{{" }} $labels.kubernetes_pod_name {{ "}}" }}'
- alert: KafkaOfflineLogDirectoryCount
expr: kafka_log_logmanager_offlinelogdirectorycount > 0
for: 10s
labels:
severity: warning
namespace: {{ .Release.Namespace }}
alert_source: {{ .Release.Name }}
annotations:
summary: 'Kafka offline log directories'
description: 'There are {{ "{{" }} $value {{ "}}" }} offline log directoris on {{ "{{" }} $labels.kubernetes_pod_name {{ "}}" }}'
- name: zookeeper-alerts
rules:
- alert: ZookeeperAvgRequestLatency
expr: zookeeper_avgrequestlatency > 10
for: 10s
labels:
severity: warning
namespace: {{ .Release.Namespace }}
alert_source: {{ .Release.Name }}
annotations:
summary: 'Zookeeper average request latency'
description: 'The average request latency is {{ "{{" }} $value {{ "}}" }} on {{ "{{" }} $labels.kubernetes_pod_name {{ "}}" }}'
- alert: ZookeeperOutstandingRequests
expr: zookeeper_outstandingrequests > 10
for: 10s
labels:
severity: warning
namespace: {{ .Release.Namespace }}
alert_source: {{ .Release.Name }}
annotations:
summary: 'Zookeeper outstanding requests'
description: 'There are {{ "{{" }} $value {{ "}}" }} outstanding requests on {{ "{{" }} $labels.kubernetes_pod_name {{ "}}" }}'
{{- end }}
{{- if and .Values.monitoring.enabled .Values.kafka.install -}}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "name" . }}-kafka
labels:
app: {{ template "name" . }}-kafka
chart: {{ template "chart" . }}
release: {{ $.Release.Name }}
heritage: {{ $.Release.Service }}
wek8s-monitoring: "true"
team: {{ .Values.monitoring.team }}
spec:
jobLabel: {{ template "name" . }}
selector:
matchLabels:
strimzi.io/cluster: {{ template "name" . }}
strimzi.io/kind: Kafka
strimzi.io/name: {{ template "name" . }}-kafka-bootstrap
scrapeTimeout: 15
targetLabels:
- app
- strimzi.io/cluster
- strimzi.io/kind
- strimzi.io/name
# podTargetLabels:
endpoints:
- port: metrics
interval: 10s
# relabelings:
# - action: labelmap
# regex: __meta_kubernetes_pod_label_(.+)
# separator: ;
# replacement: $1
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "name" . }}-zookeeper
labels:
app: {{ template "name" . }}-zookeeper
chart: {{ template "chart" . }}
release: {{ $.Release.Name }}
heritage: {{ $.Release.Service }}
wek8s-monitoring: "true"
team: {{ .Values.monitoring.team }}
spec:
jobLabel: {{ template "name" . }}
selector:
matchLabels:
strimzi.io/cluster: {{ template "name" . }}
strimzi.io/kind: Kafka
strimzi.io/name: {{ template "name" . }}-zookeeper-client
targetLabels:
- app
- strimzi.io/cluster
- strimzi.io/kind
- strimzi.io/name
endpoints:
- port: metrics
interval: 5s
{{- end }}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment