Last active
December 31, 2015 20:29
-
-
Save adamwalter/8040188 to your computer and use it in GitHub Desktop.
Monitor script for high server load
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/sh | |
# | |
# Script to send email notification if a server exceeds a specified load average. | |
# | |
# Selected load average limit. If above this number a notification message will be emailed. | |
NOTIFY="8" | |
TRUE="1" | |
# Email address to receive alerts. | |
EMAIL="example1@example.com example2@example.com" | |
# Create a temp file | |
TEMPFILE="$(mktemp)" | |
# The text which will be awk'ed a few times looking for the same text, so we specify it here once. | |
FTEXT='load average:' | |
# Get the load average for the last 1 minutes. | |
LOAD1MIN="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f1 | sed 's/ //g')" | |
# Get the load average for the last 10 minutes. | |
LOAD5MIN="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f2 | sed 's/ //g')" | |
# Get the load average for the last 15 minutes. | |
LOAD15MIN="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f3 | sed 's/ //g')" | |
# awk the memory stats | |
MEMU="$(free -tom | awk '/Total:/ {print "Total memory: "$2" MB\nUsed memory: "$3" MB\nFree memory: "$4" MB"}')" | |
# Get top 10 processes, sorted by CPU and RAM | |
TOPCPU="$(ps auxf | sort -nr -k 3 | head -10)" | |
TOPMEM="$(ps auxf | sort -nr -k 4 | head -10)" | |
# Email subject | |
SUBJECT="Alert $(hostname) high load average: $LOAD5MIN" | |
# Mail message body | |
echo "Server 5 min load average $LOAD5MIN is above notification threshold $NOTIFY" >> $TEMPFILE | |
echo " " >> $TEMPFILE | |
echo "Hostname: $(hostname)" >> $TEMPFILE | |
echo "Local Date & Time : $(date)" >> $TEMPFILE | |
echo " " >> $TEMPFILE | |
echo "-----------------------------------" >> $TEMPFILE | |
echo "Load averages:" >> $TEMPFILE | |
echo "-----------------------------------" >> $TEMPFILE | |
echo " " >> $TEMPFILE | |
echo "Last 1 minute: $LOAD1MIN" >> $TEMPFILE | |
echo "Last 5 minutes: $LOAD5MIN" >> $TEMPFILE | |
echo "Last 15 minutes: $LOAD15MIN" >> $TEMPFILE | |
echo " " >> $TEMPFILE | |
echo "-----------------------------------" >> $TEMPFILE | |
echo "Memory stats:" >> $TEMPFILE | |
echo "-----------------------------------" >> $TEMPFILE | |
echo " " >> $TEMPFILE | |
echo "$MEMU" >> $TEMPFILE | |
echo " " >> $TEMPFILE | |
echo "-----------------------------------" >> $TEMPFILE | |
echo "Top 10 processes (by CPU usage):" >> $TEMPFILE | |
echo "-----------------------------------" >> $TEMPFILE | |
echo " " >> $TEMPFILE | |
echo "$TOPCPU" >> $TEMPFILE | |
echo " " >> $TEMPFILE | |
echo "-----------------------------------" >> $TEMPFILE | |
echo "Top 10 processes (by memory usage):" >> $TEMPFILE | |
echo "-----------------------------------" >> $TEMPFILE | |
echo " " >> $TEMPFILE | |
echo "$TOPMEM" >> $TEMPFILE | |
# Look if the limit has been exceeded, compared with the last 15 min load average. | |
# Check if the load average is larger than the specified limit. | |
# bc will return true or false. | |
RESULT=$(echo "$LOAD5MIN > $NOTIFY" | bc) | |
# Send email function | |
send_alert() | |
{ | |
ps auxwwwf | mail -s "$SUBJECT $(echo -e "\nX-Priority: 1")" $EMAIL < $TEMPFILE | |
exit | |
} | |
# If the result is true, send the message | |
if [ "$RESULT" == "$TRUE" ]; then | |
send_alert | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment