Skip to content

Instantly share code, notes, and snippets.

@bryanhuntesl
Created June 17, 2020 16:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bryanhuntesl/2d72e2384e83d96047f1eeda73de2b6f to your computer and use it in GitHub Desktop.
Save bryanhuntesl/2d72e2384e83d96047f1eeda73de2b6f to your computer and use it in GitHub Desktop.
# Derived from ./manifests
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-k8s
namespace: monitoring
---
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager
namespace: monitoring
data:
config.yml: |-
global:
# ResolveTimeout is the time after which an alert is declared resolved
# if it has not been updated.
resolve_timeout: 5m
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'smtp.gmail.com:587'
smtp_from: 'foo@bar.com'
smtp_auth_username: 'foo@bar.com'
smtp_auth_password: 'barfoo'
# The API URL to use for Slack notifications.
slack_api_url: 'https://hooks.slack.com/services/some/api/token'
# # The directory from which notification templates are read.
templates:
- '/etc/alertmanager-templates/*.tmpl'
# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
#repeat_interval: 1m
repeat_interval: 15m
# A default receiver
# If an alert isn't caught by a route, send it to default.
receiver: default
# All the above attributes are inherited by all child routes and can
# overwritten on each.
# The child route trees.
routes:
# Send severity=slack alerts to slack.
- match:
severity: slack
receiver: slack_alert
# - match:
# severity: email
# receiver: email_alert
receivers:
- name: 'default'
slack_configs:
- channel: '#alertmanager-test'
text: '<!channel>{{ template "slack.devops.text" . }}'
send_resolved: true
- name: 'slack_alert'
slack_configs:
- channel: '#alertmanager-test'
send_resolved: true
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: alertmanager
namespace: monitoring
labels:
app: alertmanager
spec:
replicas: 1
selector:
matchLabels:
app: alertmanager
template:
metadata:
name: alertmanager
labels:
app: alertmanager
spec:
containers:
- name: alertmanager
image: quay.io/prometheus/alertmanager:v0.7.1
args:
- "-config.file=/etc/alertmanager/config.yml"
- "-storage.path=/alertmanager"
ports:
- name: alertmanager
containerPort: 9093
volumeMounts:
- name: config-volume
mountPath: /etc/alertmanager
- name: templates-volume
mountPath: /etc/alertmanager-templates
- name: alertmanager
mountPath: /alertmanager
volumes:
- name: config-volume
configMap:
name: alertmanager
- name: templates-volume
configMap:
name: alertmanager-templates
- name: alertmanager
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
labels:
name: alertmanager
name: alertmanager
namespace: monitoring
spec:
selector:
app: alertmanager
type: NodePort
ports:
- name: alertmanager
protocol: TCP
port: 9093
targetPort: 9093
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana-core
namespace: monitoring
labels:
app: grafana
component: core
spec:
replicas: 1
selector:
matchLabels:
app: grafana
component: core
template:
metadata:
labels:
app: grafana
component: core
spec:
containers:
- image: grafana/grafana:latest
name: grafana-core
imagePullPolicy: Always
# env:
resources:
# keep request = limit to keep this container in guaranteed class
limits:
cpu: 100m
memory: 100Mi
requests:
cpu: 100m
memory: 100Mi
env:
# The following env variables set up basic auth twith the default admin user and admin password.
- name: GF_AUTH_BASIC_ENABLED
value: "true"
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "false"
- name: GF_SECURITY_ADMIN_USER
valueFrom:
secretKeyRef:
name: monitoring
key: admin-username
- name: GF_SECURITY_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: monitoring
key: admin-password
- name: GF_SMTP_HOST
valueFrom:
secretKeyRef:
name: monitoring
key: smtp-host
- name: GF_SMTP_USER
valueFrom:
secretKeyRef:
name: monitoring
key: smtp-username
- name: GF_SMTP_PASSWORD
valueFrom:
secretKeyRef:
name: monitoring
key: smtp-password
# - name: GF_AUTH_ANONYMOUS_ORG_ROLE
# value: Admin
# does not really work, because of template variables in exported dashboards:
# - name: GF_DASHBOARDS_JSON_ENABLED
# value: "true"
readinessProbe:
httpGet:
path: /login
port: 3000
# initialDelaySeconds: 30
# timeoutSeconds: 1
volumeMounts:
- name: grafana-persistent-storage
mountPath: /var/lib/grafana
volumes:
- name: grafana-persistent-storage
emptyDir: {}
---
# apiVersion: extensions/v1beta1
# kind: Ingress
# metadata:
# name: grafana
# namespace: monitoring
# spec:
# rules:
# - host: <yourchoice>.<cluster-id>.k8s.gigantic.io
# http:
# paths:
# - path: /
# backend:
# serviceName: grafana
# servicePort: 3000
---
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: monitoring
labels:
app: grafana
component: core
spec:
type: NodePort
ports:
- port: 3000
selector:
app: grafana
component: core
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-core
namespace: monitoring
labels:
app: prometheus
component: core
spec:
selector:
matchLabels:
app: prometheus
component: core
replicas: 1
template:
metadata:
name: prometheus-main
labels:
app: prometheus
component: core
spec:
serviceAccountName: prometheus-k8s
containers:
- name: prometheus
image: prom/prometheus:v1.7.0
args:
- "-storage.local.retention=12h"
- "-storage.local.memory-chunks=500000"
- "-config.file=/etc/prometheus/prometheus.yaml"
- "-alertmanager.url=http://alertmanager:9093/"
ports:
- name: webui
containerPort: 9090
resources:
requests:
cpu: 500m
memory: 500M
limits:
cpu: 500m
memory: 500M
volumeMounts:
- name: config-volume
mountPath: /etc/prometheus
- name: rules-volume
mountPath: /etc/prometheus-rules
volumes:
- name: config-volume
configMap:
name: prometheus-core
- name: rules-volume
configMap:
name: prometheus-rules
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: kube-state-metrics
namespace: monitoring
labels:
app: kube-state-metrics
spec:
selector:
matchLabels:
app: kube-state-metrics
replicas: 1
template:
metadata:
labels:
app: kube-state-metrics
spec:
serviceAccountName: kube-state-metrics
containers:
- name: kube-state-metrics
image: gcr.io/google_containers/kube-state-metrics:v0.5.0
ports:
- containerPort: 8080
---
# ---
## apiVersion: rbac.authorization.k8s.io/v1beta1
## kind: ClusterRoleBinding
## metadata:
## name: kube-state-metrics
## roleRef:
## apiGroup: rbac.authorization.k8s.io
## kind: ClusterRole
## name: kube-state-metrics
## subjects:
## - kind: ServiceAccount
## name: kube-state-metrics
## namespace: monitoring
## ---
## apiVersion: rbac.authorization.k8s.io/v1beta1
## kind: ClusterRole
## metadata:
## name: kube-state-metrics
## rules:
## - apiGroups: [""]
## resources:
## - nodes
## - pods
## - services
## - resourcequotas
## - replicationcontrollers
## - limitranges
## verbs: ["list", "watch"]
## - apiGroups: ["extensions"]
## resources:
## - daemonsets
## - deployments
## - replicasets
## verbs: ["list", "watch"]
## ---
#apiVersion: v1
#kind: ServiceAccount
#metadata:
# name: kube-state-metrics
# namespace: monitoring
#---
#apiVersion: v1
#kind: Service
#metadata:
# annotations:
# prometheus.io/scrape: "true"
# name: kube-state-metrics
# namespace: monitoring
# labels:
# app: kube-state-metrics
#spec:
# ports:
# - name: kube-state-metrics
# port: 8080
# protocol: TCP
# selector:
# app: kube-state-metrics
#
#---
#apiVersion: apps/v1
#kind: DaemonSet
#metadata:
# name: node-directory-size-metrics
# namespace: monitoring
# labels:
# prom: disk
# annotations:
# description: |
# This `DaemonSet` provides metrics in Prometheus format about disk usage on the nodes.
# The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
# The other container `caddy` just hands out the contents of that file on request via `http` on `/metrics` at port `9102` which are the defaults for Prometheus.
# These are scheduled on every node in the Kubernetes cluster.
# To choose directories from the node to check, just mount them on the `read-du` container below `/mnt`.
#spec:
# selector:
# matchLabels:
# prom: disk
# template:
# metadata:
# labels:
# app: node-directory-size-metrics
# annotations:
# prometheus.io/scrape: "true"
# prometheus.io/port: "9102"
# description: |
# This `Pod` provides metrics in Prometheus format about disk usage on the node.
# The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
# The other container `caddy` just hands out the contents of that file on request on `/metrics` at port `9102` which are the defaults for Prometheus.
# This `Pod` is scheduled on every node in the Kubernetes cluster.
# To choose directories from the node to check just mount them on `read-du` below `/mnt`.
# spec:
# containers:
# - name: read-du
# image: giantswarm/tiny-tools
# imagePullPolicy: Always
# # FIXME threshold via env var
# # The
# command:
# - fish
# - --command
# - |
# touch /tmp/metrics-temp
# while true
# for directory in (du --bytes --separate-dirs --threshold=100M /mnt)
# echo $directory | read size path
# echo "node_directory_size_bytes{path=\"$path\"} $size" \
# >> /tmp/metrics-temp
# end
# mv /tmp/metrics-temp /tmp/metrics
# sleep 300
# end
# volumeMounts:
# - name: host-fs-var
# mountPath: /mnt/var
# readOnly: true
# - name: metrics
# mountPath: /tmp
# - name: caddy
# image: dockermuenster/caddy:0.9.3
# command:
# - "caddy"
# - "-port=9102"
# - "-root=/var/www"
# ports:
# - containerPort: 9102
# volumeMounts:
# - name: metrics
# mountPath: /var/www
# volumes:
# - name: host-fs-var
# hostPath:
# path: /var
# - name: metrics
# emptyDir:
# medium: Memory
#---
#apiVersion: apps/v1
#kind: DaemonSet
#metadata:
# name: prometheus-node-exporter
# namespace: monitoring
# labels:
# app: prometheus
# component: node-exporter
#spec:
# selector:
# matchLabels:
# app: prometheus
# component: node-exporter
# template:
# metadata:
# name: prometheus-node-exporter
# labels:
# app: prometheus
# component: node-exporter
# spec:
# containers:
# - image: prom/node-exporter:v0.14.0
# name: prometheus-node-exporter
# ports:
# - name: prom-node-exp
# #^ must be an IANA_SVC_NAME (at most 15 characters, ..)
# containerPort: 9100
# hostPort: 9100
# hostNetwork: true
# hostPID: true
#---
#apiVersion: v1
#kind: Service
#metadata:
# annotations:
# prometheus.io/scrape: "true"
# name: prometheus-node-exporter
# namespace: monitoring
# labels:
# app: prometheus
# component: node-exporter
#spec:
# clusterIP: None
# ports:
# - name: prometheus-node-exporter
# port: 9100
# protocol: TCP
# selector:
# app: prometheus
# component: node-exporter
# type: ClusterIP
#---
#apiVersion: v1
#kind: Service
#metadata:
# name: prometheus
# namespace: monitoring
# labels:
# app: prometheus
# component: core
# annotations:
# prometheus.io/scrape: "true"
#spec:
# type: NodePort
# ports:
# - port: 9090
# protocol: TCP
# name: webui
# selector:
# app: prometheus
# component: core
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment