Skip to content

Instantly share code, notes, and snippets.

@vfarcic
Last active August 7, 2022 00:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save vfarcic/9fcc214b5b7c380aef2feed3f686c08f to your computer and use it in GitHub Desktop.
Save vfarcic/9fcc214b5b7c380aef2feed3f686c08f to your computer and use it in GitHub Desktop.
# Source: https://gist.github.com/9fcc214b5b7c380aef2feed3f686c08f
cd k8s-specs
git pull
cat mon/prom-values-bare.yml
PROM_ADDR=mon.$LB_IP.nip.io
AM_ADDR=alertmanager.$LB_IP.nip.io
helm repo update
helm install prometheus \
stable/prometheus \
--namespace metrics \
--create-namespace \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-bare.yml
kubectl -n metrics \
rollout status \
deploy prometheus-server
kubectl -n metrics \
describe deployment \
prometheus-server
kubectl -n metrics \
describe cm prometheus-server
open "http://$PROM_ADDR/config"
open "http://$PROM_ADDR/targets"
kubectl -n metrics get svc
kubectl -n metrics run -it test \
--image=appropriate/curl \
--restart=Never \
--rm \
-- prometheus-node-exporter:9100/metrics
kubectl -n metrics run -it test \
--image=appropriate/curl \
--restart=Never \
--rm \
-- prometheus-kube-state-metrics:8080/metrics
open "http://$PROM_ADDR/alerts"
open "http://$PROM_ADDR/graph"
kubectl -n metrics run -it test \
--image=appropriate/curl \
--restart=Never \
--rm \
-- prometheus-kube-state-metrics:8080/metrics \
| grep "kube_node_info"
# Prometheus expression
# kube_node_info
# Prometheus expression
# count(kube_node_info)
diff mon/prom-values-bare.yml \
mon/prom-values-nodes.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-nodes.yml
open "http://$PROM_ADDR/alerts"
diff mon/prom-values-nodes.yml \
mon/prom-values-nodes-0.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-nodes-0.yml
open "http://$PROM_ADDR/alerts"
open "http://$AM_ADDR"
diff mon/prom-values-nodes-0.yml \
mon/prom-values-nodes-am.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-nodes-am.yml
open "https://devops20.slack.com/messages/CSFMLBLKH/"
GD5_ADDR=go-demo-5.$LB_IP.nip.io
kubectl create namespace go-demo-5
helm install go-demo-5 \
https://github.com/vfarcic/go-demo-5/releases/download/0.0.1/go-demo-5-0.0.1.tgz \
--namespace go-demo-5 \
--set ingress.host=$GD5_ADDR
kubectl -n go-demo-5 \
rollout status \
deployment go-demo-5
curl "http://$GD5_ADDR/demo/hello"
open "http://$PROM_ADDR/graph"
# Prometheus expression
# nginx_ingress_controller_request_duration_seconds_bucket
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_count[5m]
# ))
# by (ingress)
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_bucket{
# le="0.25"
# }[5m]
# ))
# by (ingress)
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_bucket{
# le="0.25"
# }[5m]
# ))
# by (ingress) /
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_count[5m]
# ))
# by (ingress)
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_bucket{
# le="0.25",
# ingress="go-demo-5"
# }[5m]
# ))
# by (ingress) /
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_count{
# ingress="go-demo-5"
# }[5m]
# ))
# by (ingress)
for i in {1..30}; do
DELAY=$[ $RANDOM % 1000 ]
curl "http://$GD5_ADDR/demo/hello?delay=$DELAY"
done
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_bucket{
# le="0.25",
# ingress="go-demo-5"
# }[5m]
# ))
# by (ingress) /
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_count{
# ingress="go-demo-5"
# }[5m]
# ))
# by (ingress)
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_bucket{
# le="0.25"
# }[5m]
# ))
# by (ingress) /
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_count[5m]
# ))
# by (ingress) < 0.95
diff mon/prom-values-nodes-am.yml \
mon/prom-values-latency.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-latency.yml
open "http://$PROM_ADDR/alerts"
for i in {1..30}; do
DELAY=$[ $RANDOM % 10000 ]
curl "http://$GD5_ADDR/demo/hello?delay=$DELAY"
done
open "http://$PROM_ADDR/alerts"
open "https://devops20.slack.com/messages/CSFMLBLKH/"
open "http://$PROM_ADDR/graph"
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_bucket{
# le="0.25",
# ingress!~"prometheus-server|jenkins"
# }[5m]
# ))
# by (ingress) /
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_count{
# ingress!~"prometheus-server|jenkins"
# }[5m]
# ))
# by (ingress)
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_bucket{
# le="0.5",
# ingress=~"prometheus-server|jenkins"
# }[5m]
# ))
# by (ingress) /
# sum(rate(
# nginx_ingress_controller_request_duration_seconds_count{
# ingress=~"prometheus-server|jenkins"
# }[5m]
# ))
# by (ingress)
for i in {1..100}; do
curl "http://$GD5_ADDR/demo/hello"
done
open "http://$PROM_ADDR/graph"
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_requests[5m]
# ))
# by (ingress)
# Prometheus expression
# kube_deployment_status_replicas
# Prometheus expression
# label_join(
# kube_deployment_status_replicas,
# "ingress",
# ",",
# "deployment"
# )
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_requests[5m]
# ))
# by (ingress) /
# sum(label_join(
# kube_deployment_status_replicas,
# "ingress",
# ",",
# "deployment"
# ))
# by (ingress)
diff mon/prom-values-latency.yml \
mon/prom-values-latency2.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-latency2.yml
open "http://$PROM_ADDR/alerts"
for i in {1..200}; do
curl "http://$GD5_ADDR/demo/hello"
done
open "http://$PROM_ADDR/alerts"
open "https://devops20.slack.com/messages/CSFMLBLKH/"
for i in {1..100}; do
curl "http://$GD5_ADDR/demo/hello"
done
open "http://$PROM_ADDR/graph"
# Prometheus expression
# nginx_ingress_controller_requests
for i in {1..100}; do
curl "http://$GD5_ADDR/demo/random-error"
done
open "http://$PROM_ADDR/graph"
# Prometheus expression
# sum(rate(
# nginx_ingress_controller_requests{
# status=~"5.."
# }[5m]
# ))
# by (ingress) /
# sum(rate(
# nginx_ingress_controller_requests[5m]
# ))
# by (ingress)
diff mon/prom-values-cpu-memory.yml \
mon/prom-values-errors.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-errors.yml
open "http://$PROM_ADDR/graph"
# Prometheus expression
# sum(rate(
# node_cpu_seconds_total{
# mode!="idle",
# mode!="iowait",
# mode!~"^(?:guest.*)$"
# }[5m]
# ))
# by (instance)
# Prometheus expression
# count(
# node_cpu_seconds_total{
# mode="system"
# }
# )
# Prometheus expression
# sum(rate(
# node_cpu_seconds_total{
# mode!="idle",
# mode!="iowait",
# mode!~"^(?:guest.*)$"
# }[5m]
# )) /
# count(
# node_cpu_seconds_total{
# mode="system"
# }
# )
# Prometheus expression
# kube_node_status_allocatable_cpu_cores
# Prometheus expression
# sum(
# kube_node_status_allocatable_cpu_cores
# )
# Prometheus expression
# kube_pod_container_resource_requests_cpu_cores
# Prometheus expression
# sum(
# kube_pod_container_resource_requests_cpu_cores
# )
# Prometheus expression
# sum(
# kube_pod_container_resource_requests_cpu_cores
# ) /
# sum(
# kube_node_status_allocatable_cpu_cores
# )
diff mon/prom-values-latency2.yml \
mon/prom-values-cpu.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-cpu.yml
open "http://$PROM_ADDR/alerts"
open "http://$PROM_ADDR/graph"
# Prometheus expression
# node_memory_MemTotal_bytes
# Prometheus expression
# node_memory_MemAvailable_bytes
# Prometheus expression
# 1 -
# sum(
# node_memory_MemAvailable_bytes
# ) /
# sum(
# node_memory_MemTotal_bytes
# )
# Prometheus expression
# kube_node_status_allocatable_memory_bytes
# Prometheus expression
# kube_pod_container_resource_requests_memory_bytes
# Prometheus expression
# sum(
# kube_pod_container_resource_requests_memory_bytes
# )
# Prometheus expression
# sum(
# kube_pod_container_resource_requests_memory_bytes
# ) /
# sum(
# kube_node_status_allocatable_memory_bytes
# )
diff mon/prom-values-cpu.yml \
mon/prom-values-memory.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-memory.yml
open "http://$PROM_ADDR/alerts"
# Prometheus expression
# sum(rate(
# node_cpu_seconds_total{
# mode!="idle",
# mode!="iowait",
# mode!~"^(?:guest.*)$"
# }[5m]
# ))
# by (instance) /
# count(
# node_cpu_seconds_total{
# mode="system"
# }
# )
# by (instance)
# Prometheus expression
# 1 -
# sum(
# node_memory_MemAvailable_bytes
# )
# by (instance) /
# sum(
# node_memory_MemTotal_bytes
# )
# by (instance)
diff mon/prom-values-memory.yml \
mon/prom-values-cpu-memory.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-cpu-memory.yml
open "http://$PROM_ADDR/alerts"
open "http://$PROM_ADDR/graph"
# Prometheus expression
# kube_pod_status_phase
# Prometheus expression
# sum(
# kube_pod_status_phase
# )
# by (phase)
# Prometheus expression
# sum(
# kube_pod_status_phase{
# phase=~"Failed|Unknown|Pending"
# }
# )
# by (phase)
kubectl run problem \
--image i-do-not-exist \
--restart=Never
kubectl get pods
kubectl describe pod problem
diff mon/prom-values-errors.yml \
mon/prom-values-phase.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-phase.yml
open "https://devops20.slack.com/messages/CSFMLBLKH/"
kubectl delete pod problem
open "http://$PROM_ADDR/graph"
# Prometheus expression
# kube_pod_start_time
# Prometheus expression
# time()
# Prometheus expression
# time() -
# kube_pod_start_time
# Prometheus expression
# (
# time() -
# kube_pod_start_time{
# namespace!="kube-system"
# }
# ) > 60
# Prometheus expression
# (
# time() -
# kube_pod_start_time{
# namespace!="kube-system"
# }
# ) >
# (60 * 60 * 24 * 90)
diff mon/prom-values-phase.yml \
mon/prom-values-old-pods.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-old-pods.yml
open "https://devops20.slack.com/messages/CSFMLBLKH/"
open "http://$PROM_ADDR/graph"
# Prometheus expression
# container_memory_usage_bytes
# Prometheus expression
# container_memory_usage_bytes{
# container_name!=""
# }
# Prometheus expression
# container_memory_usage_bytes{
# container_name="prometheus-server"
# }
# Prometheus expression
# sum(rate(
# container_cpu_usage_seconds_total{
# container_name="prometheus-server"
# }[5m]
# ))
# by (pod_name)
open "http://$PROM_ADDR/graph"
# Prometheus expression
# kube_pod_container_resource_requests_memory_bytes{
# container="prometheus-server"
# }
# Prometheus expression
# sum(label_join(
# container_memory_usage_bytes{
# container_name="prometheus-server"
# },
# "pod",
# ",",
# "pod_name"
# ))
# by (pod)
# Prometheus expression
# sum(label_join(
# container_memory_usage_bytes{
# container_name="prometheus-server"
# },
# "pod",
# ",",
# "pod_name"
# ))
# by (pod) /
# sum(
# kube_pod_container_resource_requests_memory_bytes{
# container="prometheus-server"
# }
# )
# by (pod)
# Prometheus expression
# sum(label_join(
# container_memory_usage_bytes{
# namespace!="kube-system"
# },
# "pod",
# ",",
# "pod_name"
# ))
# by (pod) /
# sum(
# kube_pod_container_resource_requests_memory_bytes{
# namespace!="kube-system"
# }
# )
# by (pod)
diff mon/prom-values-old-pods.yml \
mon/prom-values-req-mem.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-req-mem.yml
diff mon/prom-values-req-mem.yml \
mon/prom-values-req-cpu.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-req-cpu.yml
open "http://$PROM_ADDR/graph"
# Prometheus expression
# sum(label_join(
# container_memory_usage_bytes{
# namespace!="kube-system"
# },
# "pod",
# ",",
# "pod_name"
# ))
# by (pod) /
# sum(
# kube_pod_container_resource_limits_memory_bytes{
# namespace!="kube-system"
# }
# )
# by (pod)
diff mon/prom-values-req-cpu.yml \
mon/prom-values-limit-mem.yml
helm upgrade prometheus \
stable/prometheus \
--namespace metrics \
--version 9.5.2 \
--set server.ingress.hosts={$PROM_ADDR} \
--set alertmanager.ingress.hosts={$AM_ADDR} \
-f mon/prom-values-limit-mem.yml
open "http://$PROM_ADDR/alerts"
helm delete prometheus \
--namespace metrics
helm delete go-demo-5 \
--namespace go-demo-5
kubectl delete ns go-demo-5 metrics
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment