Skip to content

Instantly share code, notes, and snippets.

@adamwalter
Last active December 31, 2015 20:29
Show Gist options
  • Save adamwalter/8040188 to your computer and use it in GitHub Desktop.
Save adamwalter/8040188 to your computer and use it in GitHub Desktop.
Monitor script for high server load
#! /bin/sh
#
# Script to send email notification if a server exceeds a specified load average.
#
# Selected load average limit.  If above this number a notification message will be emailed.
NOTIFY="8"
TRUE="1"
# Email address to receive alerts.
EMAIL="example1@example.com example2@example.com"
# Create a temp file
TEMPFILE="$(mktemp)"
# The text which will be awk'ed a few times looking for the same text, so we specify it here once.
FTEXT='load average:'
# Get the load average for the last 1 minutes.
LOAD1MIN="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f1 | sed 's/ //g')"
# Get the load average for the last 10 minutes.
LOAD5MIN="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f2 | sed 's/ //g')"
# Get the load average for the last 15 minutes.
LOAD15MIN="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f3 | sed 's/ //g')"
# awk the memory stats
MEMU="$(free -tom | awk '/Total:/ {print "Total memory: "$2" MB\nUsed memory: "$3" MB\nFree memory: "$4" MB"}')"
# Get top 10 processes, sorted by CPU and RAM
TOPCPU="$(ps auxf | sort -nr -k 3 | head -10)"
TOPMEM="$(ps auxf | sort -nr -k 4 | head -10)"
# Email subject
SUBJECT="Alert $(hostname) high load average: $LOAD5MIN"
# Mail message body
echo "Server 5 min load average $LOAD5MIN is above notification threshold $NOTIFY" >> $TEMPFILE
echo " " >> $TEMPFILE
echo "Hostname: $(hostname)" >> $TEMPFILE
echo "Local Date & Time : $(date)" >> $TEMPFILE
echo " " >> $TEMPFILE
echo "-----------------------------------" >> $TEMPFILE
echo "Load averages:" >> $TEMPFILE
echo "-----------------------------------" >> $TEMPFILE
echo " " >> $TEMPFILE
echo "Last 1 minute: $LOAD1MIN" >> $TEMPFILE
echo "Last 5 minutes: $LOAD5MIN" >> $TEMPFILE
echo "Last 15 minutes: $LOAD15MIN" >> $TEMPFILE
echo " " >> $TEMPFILE
echo "-----------------------------------" >> $TEMPFILE
echo "Memory stats:" >> $TEMPFILE
echo "-----------------------------------" >> $TEMPFILE
echo " " >> $TEMPFILE
echo "$MEMU" >> $TEMPFILE
echo " " >> $TEMPFILE
echo "-----------------------------------" >> $TEMPFILE
echo "Top 10 processes (by CPU usage):" >> $TEMPFILE
echo "-----------------------------------" >> $TEMPFILE
echo " " >> $TEMPFILE
echo "$TOPCPU" >> $TEMPFILE
echo " " >> $TEMPFILE
echo "-----------------------------------" >> $TEMPFILE
echo "Top 10 processes (by memory usage):" >> $TEMPFILE
echo "-----------------------------------" >> $TEMPFILE
echo " " >> $TEMPFILE
echo "$TOPMEM" >> $TEMPFILE
# Look if the limit has been exceeded, compared with the last 15 min load average.
# Check if the load average is larger than the specified limit.
# bc will return true or false.
RESULT=$(echo "$LOAD5MIN > $NOTIFY" | bc)
# Send email function
send_alert()
{
ps auxwwwf | mail -s "$SUBJECT $(echo -e "\nX-Priority: 1")" $EMAIL < $TEMPFILE
exit
}
# If the result is true, send the message
if [ "$RESULT" == "$TRUE" ]; then
send_alert
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment