Skip to content

Instantly share code, notes, and snippets.

@dataops-sre
Created February 25, 2019 16:20
Show Gist options
  • Save dataops-sre/79d30d3ae25bfb12b612bdf4c7023b44 to your computer and use it in GitHub Desktop.
Save dataops-sre/79d30d3ae25bfb12b612bdf4c7023b44 to your computer and use it in GitHub Desktop.
datadog daemon set deployment with additional memory check
apiVersion: v1
kind: ConfigMap
metadata:
name: datadog-config
namespace: monitoring
data:
probe.sh: |
#!/bin/sh
set -e
/opt/datadog-agent/bin/agent/agent health
python container_memory_check.py
container_memory_check.py: |
#!/usr/bin/env python
import psutil
import sys
CGROUP_MEM_LIMIT_FILE = '/sys/fs/cgroup/memory/memory.limit_in_bytes'
def main():
mem_total = 0
f = open(CGROUP_MEM_LIMIT_FILE, "r")
container_cgroup_limit = float(f.read())
f.close()
for p in psutil.process_iter():
p_mem = p.memory_full_info()
mem_total = mem_total + (p_mem.rss - p_mem.shared)
mem_used_percent = (mem_total/container_cgroup_limit) * 100
print("container cgroup limit : {0}, total container process memory : {1}".format(container_cgroup_limit, mem_total))
print("total memory use percentage {0}".format(mem_used_percent))
if mem_used_percent > 90:
sys.exit(1)
else:
sys.exit(0)
if __name__== "__main__":
main()
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: datadog-agent
namespace: monitoring
annotations:
ad.datadoghq.com/datadog-agent.logs: '[{"source":"datadog","service":"datadog-agent"}]'
app.kubernetes.io/version: 1.0.2
labels:
app: datadog-agent
spec:
selector:
matchLabels:
name: datadog-agent
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: datadog-agent
annotations:
scheduler.alpha.kubernetes.io/tolerations: |
[{"operator": "Exists", "effect": "NoSchedule"}]
spec:
serviceAccountName: datadog-agent
containers:
- image: datadog/agent:latest-jmx
imagePullPolicy: Always
name: datadog-agent
ports:
- containerPort: 8125
# hostPort: 8125
name: dogstatsdport
protocol: UDP
- containerPort: 8126
# hostPort: 8126
name: traceport
protocol: TCP
env:
- name: DD_API_KEY
valueFrom:
secretKeyRef:
name: datadog-secret
key: DD_API_KEY
- name: DD_COLLECT_KUBERNETES_EVENTS
value: "true"
- name: DD_HOSTNAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: DD_LEADER_ELECTION
value: "true"
- name: KUBERNETES
value: "true"
- name: DD_DOGSTATSD_NON_LOCAL_TRAFFIC
value: "true"
- name: DD_KUBERNETES_KUBELET_HOST
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: DD_PROCESS_AGENT_ENABLED
value: "true"
- name: DD_LOGS_ENABLED
value: "true"
- name: SD_JMX_ENABLE
value: "true"
resources:
requests:
memory: "256Mi"
cpu: "200m"
limits:
memory: "1024Mi"
cpu: "400m"
volumeMounts:
- name: dockersocket
mountPath: /var/run/docker.sock
- name: procdir
mountPath: /host/proc
readOnly: true
- name: cgroups
mountPath: /host/sys/fs/cgroup
readOnly: true
- name: logsdir
mountPath: /opt/datadog-agent/run
readOnly: false
- name: "datadog-config"
mountPath: "/container_memory_check.py"
subPath: "container_memory_check.py"
- name: "datadog-config"
mountPath: "/probe.sh"
subPath: "probe.sh"
# Required to display usernames works more or less OK without it, as everything runs as root...
# - name: passwd
# mountPath: /etc/passwd
# readOnly: true
livenessProbe:
exec:
command:
- ./probe.sh
initialDelaySeconds: 15
periodSeconds: 5
volumes:
- hostPath:
path: /var/run/docker.sock
name: dockersocket
- hostPath:
path: /proc
name: procdir
- hostPath:
path: /sys/fs/cgroup
name: cgroups
- hostPath:
path: /opt/datadog-agent/run
name: logsdir
- name: "datadog-config"
configMap:
name: "datadog-config"
defaultMode: 0744
# Required to display usernames works more or less OK without it, as everything runs as root...
# - hostPath:
# path: /etc/passwd
# name: passwd
tolerations:
- operator: "Exists"
effect: "NoSchedule"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment