Skip to content

Instantly share code, notes, and snippets.

@openfirmware
Last active August 29, 2015 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save openfirmware/2bc252b66a10830275d9 to your computer and use it in GitHub Desktop.
Save openfirmware/2bc252b66a10830275d9 to your computer and use it in GitHub Desktop.
check_zfs for nagios, updated for open-zfs
#!/usr/bin/perl -w
# check_zfs Nagios plugin for monitoring ZFS zpools
# Copyright (c) 2014-2015
# Written by James Badger
# Released under the GNU Public License
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# Version: 1.1.0
# Date : 22nd April 2015
# This plugin works with open-zfs 0.6.4 on Ubuntu 14.04. It also correctly
# parses the updated pool status line.
# Forked from Nathan Butcher's 2007 version:
# http://exchange.nagios.org/directory/Plugins/Operating-Systems/Solaris/check_zfs/details
# Verbose levels:-
# 1 - Only alert us of zpool health and size stats
# 2 - ...also alert us of failed devices when things go bad
# 3 - ...alert us of the status of all devices regardless of health
#
# Usage: check_zfs <zpool> <verbose level 1-3>
# Example: check_zfs zeepool 1
# ZPOOL zeedata : ONLINE {Size:3.97G Used:183K Avail:3.97G Cap:0%}
use strict;
my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2);
my $state="UNKNOWN";
my $msg="FAILURE";
if ($#ARGV+1 != 2) {
print "Usage: $0 <zpool name> <verbose level 1-3>\n";
exit $ERRORS{$state};
}
my $pool=$ARGV[0];
my $verbose=$ARGV[1];
my $size="";
my $used="";
my $avail="";
my $expandsz="";
my $frag="";
my $cap="";
my $dedup="";
my $health="";
my $dmge="";
if ($verbose < 1 || $verbose > 3) {
print "Verbose levels range from 1-3\n";
exit $ERRORS{$state};
}
my $statcommand="zpool list -H $pool";
if (! open STAT, "$statcommand|") {
print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n");
exit $ERRORS{$state};
}
while(<STAT>) {
chomp;
($size, $used, $avail, $expandsz, $frag, $cap, $dedup, $health) = /^${pool}\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)/;
}
close(STAT);
## check for valid zpool list response from zpool
if (! $health ) {
$state = "CRITICAL";
$msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool;
print $state, " ", $msg;
exit ($ERRORS{$state});
}
## determine health of zpool and subsequent error status
if ($health eq "ONLINE" ) {
$state = "OK";
} else {
if ($health eq "DEGRADED") {
$state = "WARNING";
} else {
$state = "CRITICAL";
}
}
## get more detail on possible device failure
## flag to detect section of zpool status involving our zpool
my $poolfind=0;
$statcommand="zpool status $pool";
if (! open STAT, "$statcommand|") {
$state = 'CRITICAL';
print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n");
exit $ERRORS{$state};
}
## go through zfs status output to find zpool fses and devices
while(<STAT>) {
chomp;
if (/^\s${pool}/ && $poolfind==1) {
$poolfind=2;
next;
} elsif ( $poolfind==1 ) {
$poolfind=0;
}
if (/NAME\s+STATE\s+READ\s+WRITE\s+CKSUM/) {
$poolfind=1;
}
if ( /^$/ ) {
$poolfind=0;
}
if ($poolfind == 2) {
## special cases pertaining to full verbose
if (/^\sspares/) {
next unless $verbose == 3;
$dmge=$dmge . "[SPARES]:- ";
next;
}
if (/^\s{5}spare\s/) {
next unless $verbose == 3;
my ($sta) = /spare\s+(\S+)/;
$dmge=$dmge . "[SPARE:${sta}]:- ";
next;
}
if (/^\s{5}replacing\s/) {
next unless $verbose == 3;
my $perc;
my ($sta) = /^\s+\S+\s+(\S+)/;
if (/%/) {
($perc) = /([0-9]+%)/;
} else {
$perc = "working";
}
$dmge=$dmge . "[REPLACING:${sta} (${perc})]:- ";
next;
}
## other cases
my ($dev, $sta) = /^\s+(\S+)\s+(\S+)/;
## pool online, not degraded thanks to dead/corrupted disk
if ($state eq "OK" && $sta eq "UNAVAIL") {
$state="WARNING";
## switching to verbose level 2 to explain weirdness
if ($verbose == 1) {
$verbose =2;
}
}
## no display for verbose level 1
next if ($verbose==1);
## don't display working devices for verbose level 2
next if ($verbose==2 && $state eq "OK");
next if ($verbose==2 && ($sta eq "ONLINE" || $sta eq "AVAIL" || $sta eq "INUSE"));
## show everything else
if (/^\s{3}(\S+)/) {
$dmge=$dmge . "<" . $dev . ":" . $sta . "> ";
} elsif (/^\s{7}(\S+)/) {
$dmge=$dmge . "(" . $dev . ":" . $sta . ") ";
} else {
$dmge=$dmge . $dev . ":" . $sta . " ";
}
}
}
## calling all goats!
$msg = sprintf "ZPOOL %s : %s {Size:%s Used:%s Avail:%s Frag: %s Cap:%s} %s\n", $pool, $health, $size, $used, $avail, $frag, $cap, $dmge;
print $state, " ", $msg;
exit ($ERRORS{$state});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment