Skip to content

Instantly share code, notes, and snippets.

@emiljaregran
Forked from petervanderdoes/zfs_health.sh
Last active January 14, 2020 06:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save emiljaregran/99e7115753b47e9ebde476df0f16c8f9 to your computer and use it in GitHub Desktop.
Save emiljaregran/99e7115753b47e9ebde476df0f16c8f9 to your computer and use it in GitHub Desktop.
ZFS Health Check Script
#!/usr/local/bin/bash
#
# MODIFIED VERSION OF:
#
# Calomel.org
# https://calomel.org/zfs_health_check_script.html
# FreeBSD 9.1 ZFS Health Check script
# zfs_health.sh @ Version 0.15
emailReceiver="receiver@yourdomain.com"
emailSender="sender@yourdomain.com"
emailSubject="ZFS pool - HEALTH check"
emailMessage=""
maxCapacity=80
scrubExpire=691200 # 8 Days
problems=0
# Health - Check if all zfs volumes are in good condition. We are looking for
# any keyword signifying a degraded or broken array.
condition=$(/sbin/zpool status | egrep -i '(DEGRADED|FAULTED|OFFLINE|UNAVAIL|REMOVED|FAIL|DESTROYED|corrupt|cannot|unrecover)')
if [ "${condition}" ]; then
emailSubject="$emailSubject - fault"
problems=1
fi
# Capacity - Make sure pool capacities are below 80% for best performance.
if [ ${problems} -eq 0 ]; then
capacity=$(/sbin/zpool list -H -o capacity)
for line in ${capacity//%/}
do
if [ $line -ge $maxCapacity ]; then
emailSubject="$emailSubject - Capacity Exceeded"
problems=1
fi
done
fi
# Errors - Check the columns for READ, WRITE and CKSUM (checksum) drive errors
# on all volumes and all drives using "zpool status". If any non-zero errors
# are reported an email will be sent out.
if [ ${problems} -eq 0 ]; then
errors=$(/sbin/zpool status | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000)
if [ "${errors}" ]; then
emailSubject="$emailSubject - Drive Errors"
problems=1
fi
fi
# Scrub Expired - Check if all volumes have been scrubbed in at least the last 8 days.
if [ ${problems} -eq 0 ]; then
currentDate=$(date +%s)
zfsVolumes=$(/sbin/zpool list -H -o name)
for volume in ${zfsVolumes}
do
if [ $(/sbin/zpool status $volume | egrep -c "none requested") -ge 1 ]; then
echo "ERROR: You need to run \"zpool scrub $volume\" before this script can monitor the scrub expiration time."
break
fi
if [ $(/sbin/zpool status $volume | egrep -c "scrub in progress|resilver") -ge 1 ]; then
break
fi
### FreeBSD 11.2 with *nix supported date format
scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $15 $12 $13}')
scrubDate=$(date -j -f '%Y%b%e-%H%M%S' $scrubRawDate'-000000' +%s)
### FreeBSD 12.0 with *nix supported date format
#scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $17 $14 $15}')
#scrubDate=$(date -j -f '%Y%b%e-%H%M%S' $scrubRawDate'-000000' +%s)
if [ $(($currentDate - $scrubDate)) -ge $scrubExpire ]; then
if [ ${problems} -eq 0 ]; then
emailSubject="$emailSubject - Scrub Time Expired. Scrub Needed on Volume(s)"
fi
problems=1
emailMessage="${emailMessage}Pool: $volume needs scrub \n"
fi
done
fi
# Notifications - On any problems send email with drive status information and
# capacities including a helpful subject line. Also use logger to write
# the email subject to the local logs.
if [ "$problems" -ne 0 ]; then
echo -e "To: $emailReceiver\nFrom: $emailSender\nSubject: $emailSubject\n\n
$emailMessage\n\n\n `/sbin/zpool list` \n\n\n `/sbin/zpool status`" | ssmtp $emailReceiver
logger $emailSubject
fi
### EOF ###
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment