Skip to content

Instantly share code, notes, and snippets.

@ppetko
Last active June 4, 2022 21:18
Show Gist options
  • Save ppetko/89b54f154edcf8220c56bee6717a2081 to your computer and use it in GitHub Desktop.
Save ppetko/89b54f154edcf8220c56bee6717a2081 to your computer and use it in GitHub Desktop.
cMonitor - Docker monitoring for the state of the containers
#!/bin/bash
set -o pipefail
critical=0
state_replicated=0
state_swarm=0
DATE=`date +%Y-%m-%d-%H:%M:%S`
function log(){
echo "$DATE $@"
return 0
}
function warn(){
echo "$DATE $@"
return 1
}
function panic(){
echo "$DATE $@"
exit 1
}
function check_user(){
log "${FUNCNAME[0]}"
if [ "$(whoami)" != "root" ]; then
panic "Root privileges are required to run this, try running with sudo..."
fi
}
function check_docker(){
log "${FUNCNAME[0]}"
if [ "x$(which docker)" == "x" ]; then
panic "UNKNOWN - Missing docker binary"
fi
}
function check_docker_daemon(){
log "${FUNCNAME[0]}"
systemctl status docker > /dev/null 2>&1
if [ "${?}" != 0 ]; then
panic "Docker daemon is not running"
fi
}
function check_deps(){
log "${FUNCNAME[0]}"
which json_reformat > /dev/null 2>&1
if [ "${?}" -ne 0 ]; then
panic "json_reformat is not installed"
fi
}
function isManager(){
isManaget=`docker info | grep 'Is Manager'| awk -F' ' '{print $3}'`
if [ "${isManaget}" == "false" ]; then
log "OK - Not a swarm master"
exit 0
fi
}
function check_swarm_nodes(){
log "${FUNCNAME[0]}"
isManager
for NODE in `docker node ls | egrep 'Leader|Reachable|Reachable'| awk '{print $1}'`;do
ManagerStatus=$(docker node inspect --format="{{.ManagerStatus.Reachability}}" $NODE) > /dev/null 2>&1
if [ "$ManagerStatus" == "Unavailable" ]; then
let "state_swarm=state_swarm+1"
warn "Swarm node $NODE is not healthy"
fi
done
return $state_swarm
}
function check_replication(){
log "${FUNCNAME[0]}"
isManager
for i in `docker service ls -q`; do
replicated=`docker service inspect --pretty $i | grep -i "Service Mode" | awk '{print $3}'`
if [ "${replicated}" == "Replicated" ]; then
replicas=`docker service inspect --pretty $i | grep -i replicas | awk '{print $2}'`
running_replicas=`docker service ps $i |grep -i running | wc -l`
if [ ${replicas} -ne "${running_replicas}" ]; then
let "state_replicated=state_replicated+1"
warn "Service $i is not replciated"
fi
fi
done
return $state_replicated
}
function check_all_container(){
log "${FUNCNAME[0]}"
for CONTAINER in `docker ps -a -q`; do
RUNNING=$(docker inspect --format="{{.State.Running}}" $CONTAINER)
RESTARTING=$(docker inspect --format="{{.State.Restarting}}" $CONTAINER)
if [ "$RUNNING" == "false" ] || [ "$RESTARTING" == "true" ] ; then
let "critical=critical+1"
warn "CRITICAL - Some or all ontainers are not running"
return $STATE_CRITICAL
fi
done
}
function main(){
check_user
check_docker
check_docker_daemon
check_deps
check_swarm_nodes
check_replication
check_all_container
};main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment