Skip to content

Instantly share code, notes, and snippets.

@fredrkl
Created October 25, 2022 17:43
Show Gist options
  • Save fredrkl/538f18b466f6b11129d794bacbf05a89 to your computer and use it in GitHub Desktop.
Save fredrkl/538f18b466f6b11129d794bacbf05a89 to your computer and use it in GitHub Desktop.
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: basic_rules_prom
role: alert-rules
name: application-basic-pack
namespace: prometheus-platform-instances
spec:
groups:
- name: ./applications.basic.rules
interval: 30s
rules:
- alert: "Application waiting"
labels:
application: "{{ $labels.exported_container }}"
for: "5m"
expr: "sum without(instance, exported_pod, endpoint, job)(kube_pod_container_status_waiting)>0"
annotations:
priority: P5
description: "{{ $labels.exported_container }} is not able to start in the {{ $labels.exported_namespace }} system"
- alert: "Application network receive errors"
labels:
application: "{{ $labels.pod }}"
for: "5m"
expr: "sum by(pod)(rate(container_network_receive_errors_total[10m])>0)"
annotations:
priority: P2
description: "{{ $labels.pod}}"
- alert: "Application network transmit errors"
labels:
application: "{{ $labels.pod }}"
for: "5m"
expr: "(sum by (pod) (rate(container_network_transmit_errors_total[10m])))>0"
annotations:
priority: P2
description: "{{ $labels.pod}}"
- alert: "Application responding to slow to http requests"
labels:
application: "{{ $labels.ingress }}"
for: "5m"
expr: "(max(rate(nginx_ingress_controller_request_duration_seconds_sum[10m])) by (exported_namespace, ingress))>2"
annotations:
priority: P2
description: "Calls to {{ $labels.ingress }} in the {{ $labels.exported_namespace }} system is taking more than 2 second to respond"
- alert: "Application API endpoint is failing"
labels:
application: "{{ $labels.exported_service }}"
for: "5m"
expr: "(sum(increase(nginx_ingress_controller_requests{status=~'^5.*'}[10m])) by (exported_service, status))>0"
annotations:
priority: P2
description: "Calls to {{ $labels.exported_service }} is returning {{ $labels.status }}"
- alert: "Application low on Memory"
labels:
application: "{{ $labels.exported_container }}"
for: "10m"
expr: "((sum by (exported_container) (label_replace(rate(container_memory_usage_bytes[5m]),'exported_container', '$1', 'container', '(.*)')))/(sum by (exported_container) (kube_pod_container_resource_limits{resource='memory'})))>0.8"
annotations:
priority: P2
description: "{{ $labels.exported_container }} is running low on Memory"
- alert: "Application low on CPU"
labels:
application: "{{ $labels.exported_container }}"
for: "10m"
expr: "((sum by (exported_container) (label_replace(rate(container_cpu_usage_seconds_total[5m]),'exported_container', '$1', 'container', '(.*)')))/(sum by (exported_container) (kube_pod_container_resource_limits{resource='cpu'})))>0.8"
annotations:
priority: P2
description: "{{ $labels.exported_container }} is running low on CPU"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment