Created
October 25, 2022 17:43
-
-
Save fredrkl/538f18b466f6b11129d794bacbf05a89 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: monitoring.coreos.com/v1 | |
kind: PrometheusRule | |
metadata: | |
labels: | |
prometheus: basic_rules_prom | |
role: alert-rules | |
name: application-basic-pack | |
namespace: prometheus-platform-instances | |
spec: | |
groups: | |
- name: ./applications.basic.rules | |
interval: 30s | |
rules: | |
- alert: "Application waiting" | |
labels: | |
application: "{{ $labels.exported_container }}" | |
for: "5m" | |
expr: "sum without(instance, exported_pod, endpoint, job)(kube_pod_container_status_waiting)>0" | |
annotations: | |
priority: P5 | |
description: "{{ $labels.exported_container }} is not able to start in the {{ $labels.exported_namespace }} system" | |
- alert: "Application network receive errors" | |
labels: | |
application: "{{ $labels.pod }}" | |
for: "5m" | |
expr: "sum by(pod)(rate(container_network_receive_errors_total[10m])>0)" | |
annotations: | |
priority: P2 | |
description: "{{ $labels.pod}}" | |
- alert: "Application network transmit errors" | |
labels: | |
application: "{{ $labels.pod }}" | |
for: "5m" | |
expr: "(sum by (pod) (rate(container_network_transmit_errors_total[10m])))>0" | |
annotations: | |
priority: P2 | |
description: "{{ $labels.pod}}" | |
- alert: "Application responding to slow to http requests" | |
labels: | |
application: "{{ $labels.ingress }}" | |
for: "5m" | |
expr: "(max(rate(nginx_ingress_controller_request_duration_seconds_sum[10m])) by (exported_namespace, ingress))>2" | |
annotations: | |
priority: P2 | |
description: "Calls to {{ $labels.ingress }} in the {{ $labels.exported_namespace }} system is taking more than 2 second to respond" | |
- alert: "Application API endpoint is failing" | |
labels: | |
application: "{{ $labels.exported_service }}" | |
for: "5m" | |
expr: "(sum(increase(nginx_ingress_controller_requests{status=~'^5.*'}[10m])) by (exported_service, status))>0" | |
annotations: | |
priority: P2 | |
description: "Calls to {{ $labels.exported_service }} is returning {{ $labels.status }}" | |
- alert: "Application low on Memory" | |
labels: | |
application: "{{ $labels.exported_container }}" | |
for: "10m" | |
expr: "((sum by (exported_container) (label_replace(rate(container_memory_usage_bytes[5m]),'exported_container', '$1', 'container', '(.*)')))/(sum by (exported_container) (kube_pod_container_resource_limits{resource='memory'})))>0.8" | |
annotations: | |
priority: P2 | |
description: "{{ $labels.exported_container }} is running low on Memory" | |
- alert: "Application low on CPU" | |
labels: | |
application: "{{ $labels.exported_container }}" | |
for: "10m" | |
expr: "((sum by (exported_container) (label_replace(rate(container_cpu_usage_seconds_total[5m]),'exported_container', '$1', 'container', '(.*)')))/(sum by (exported_container) (kube_pod_container_resource_limits{resource='cpu'})))>0.8" | |
annotations: | |
priority: P2 | |
description: "{{ $labels.exported_container }} is running low on CPU" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment