Created
April 20, 2014 01:05
-
-
Save eiginn/11102164 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#GistID: 11102164 | |
# modified from http://lzone.de/Nagios%20Check%20Plugin%20for%20nofile%20Limit | |
# Check "nofile" limit for all running processes using lsof | |
MIN_COUNT=512 # default "nofile" limit is usually 1024, so no checking for | |
# processes with much less open fds needed | |
WARN_THRESHOLD=80 # default warning: 80% of file limit used | |
CRITICAL_THRESHOLD=90 # default critical: 90% of file limit used | |
while getopts "hw:c:" option; do | |
case $option in | |
w) WARN_THRESHOLD=$OPTARG;; | |
c) CRITICAL_THRESHOLD=$OPTARG;; | |
h) echo "Syntax: $0 [-w <warning percentage>] [-c <critical percentage>]"; exit 1;; | |
esac | |
done | |
results=$( | |
# Check global limit | |
global_max=$(cat /proc/sys/fs/file-nr 2>&1 |cut -f 3) | |
global_cur=$(cat /proc/sys/fs/file-nr 2>&1 |cut -f 1) | |
ratio=$(( $global_cur * 100 / $global_max)) | |
if [ $ratio -ge $CRITICAL_THRESHOLD ]; then | |
echo "CRITICAL global file usage $ratio% of $global_max used" | |
elif [ $ratio -ge $WARN_THRESHOLD ]; then | |
echo "WARNING global file usage $ratio% of $global_max used" | |
fi | |
# We use the following lsof options: | |
# | |
# -n to avoid resolving network names | |
# -b to avoid kernel locks | |
# -w to avoid warnings caused by -b | |
# +c15 to get somewhat longer process names | |
# | |
timeout 30s lsof -wbn +c15 2>/dev/null | awk '$5 !~ /(cwd|err|ltx|mem|mmap|pd|rtd|txt)/ {print $1,$2}' | sort | uniq -c |\ | |
while read count name pid remainder; do | |
# Never check anything above a sane minimum | |
if [ $count -gt $MIN_COUNT ]; then | |
# Extract the hard limit from /proc | |
limit=$(cat /proc/$pid/limits 2>/dev/null| grep 'open files' | awk '{print $5}') | |
# Check if we got something, if not the process must have terminated | |
if [ "$limit" != "" ]; then | |
ratio=$(( $count * 100 / $limit )) | |
if [ $ratio -ge $CRITICAL_THRESHOLD ]; then | |
echo "CRITICAL $name (PID $pid) $ratio% of $limit used" | |
elif [ $ratio -ge $WARN_THRESHOLD ]; then | |
echo "WARNING $name (PID $pid) $ratio% of $limit used" | |
fi | |
fi | |
fi | |
done | |
) | |
if echo $results | grep --color=never CRITICAL; then | |
exit 2 | |
fi | |
if echo $results | grep --color=never WARNING; then | |
exit 1 | |
fi | |
echo "All processes are fine." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment