Skip to content

Instantly share code, notes, and snippets.

@bokutin
Last active December 31, 2015 18:29
Show Gist options
  • Save bokutin/8027382 to your computer and use it in GitHub Desktop.
Save bokutin/8027382 to your computer and use it in GitHub Desktop.
Plugin to monitor RAID status for mpt-status
#!/usr/bin/perl -w
#
# (c) 2013 Tomohiro Hosaka bokutin@bokut.in
#
# Plugin to monitor RAID status for mpt-status
#
#
# (c) 2007 Nathan Rutman nathan@clusterfs.com
#
# Plugin to monitor RAID status
#
# Results are % of healthy drives in a raid device
# and % rebuilt of devices that are resyncing.
#
#%# family=contrib
#%# capabilities=autoconf
use strict;
use warnings;
my $use_sudo = 1;
my $is_autoconf = ($ARGV[0] and $ARGV[0] eq "autoconf") ? 1 : 0;
my $is_config = ($ARGV[0] and $ARGV[0] eq "config") ? 1 : 0;
if ($is_autoconf) {
if (-x "/usr/sbin/mpt-status" and -e "/dev/mptctl") {
if ($use_sudo and !`sudo -l /usr/sbin/mpt-status`) {
print "has not privileges. (sudo /usr/sbin/mpt-status)\n";
exit 1;
}
print "yes\n";
exit 0;
} else {
print "no RAID devices\n";
exit 1;
}
}
if ($is_config) {
print "graph_title RAID status\n";
print "graph_category disk\n";
print "graph_info This graph monitors RAID disk health. Values are percentage of healthy drives in each raid group. Degraded devices are marked Critical.\n";
print "graph_args --base 1000 -l 0\n";
print "graph_vlabel % healthy/rebuilt\n";
print "graph_scale no\n";
}
my @out = $use_sudo ? `sudo -n /usr/sbin/mpt-status -n` : `/usr/sbin/mpt-status mpt-status -n`;
my @hashes = map {
my @kv = split(/\s+(?=\S+:)/, $_);
my %hash = map {
my ($k, $v) = /^([^:]+):\s*(.*?)\s*$/;
$k => $v;
} @kv;
\%hash;
} @out;
my @ioc = grep { exists $_->{vol_id} and $_->{flags} =~ /ENABLED/ } @hashes;
for my $ioc (@ioc) {
my $dev = "ioc".$ioc->{ioc};
my $type = $ioc->{raidlevel};
my @members = grep { exists $_->{phys_id} and $_->{ioc} == $ioc->{ioc} } @hashes;
my @actives = grep { $_->{state} and $_->{state} eq 'ONLINE' } @members;
my $members = join(",", map { "scsi_id:".$_->{scsi_id} } @members);
my $nmem = @members;
my $nact = @actives;
if ($is_config) {
print "$dev.label $dev\n";
print "$dev.info $type $members\n";
# 100: means less than 100
# Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100.
print "$dev.critical 98:\n";
print "$dev\_rebuild.label $dev rebuilt\n";
print "$dev\_rebuild.info $type\n";
# Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100.
print "$dev\_rebuild.critical 98:\n";
} else {
my $pct = 100 * $nact / $nmem;
my $rpct = 100;
if ( $pct < 100 ) {
my ($rebuild) = grep { $_->{sync_state} != 100 } @members;
if ($rebuild) {
$rpct = $rebuild->{sync_state};
} else {
$rpct = 0;
}
}
print "$dev.value $pct\n";
print "$dev\_rebuild.value $rpct\n";
}
}
exit 0;
__END__
Sample output:
raid machine:
# mpt-status -n
ioc:0 vol_id:0 type:IM raidlevel:RAID-1 num_disks:2 size(GB):148 state: OPTIMAL flags: ENABLED
ioc:0 phys_id:1 scsi_id:8 vendor:ATA product_id:SAMSUNG HE160HJ revision:0-24 size(GB):149 state: ONLINE flags: NONE sync_state: 100 ASC/ASCQ:0xff/0xff SMART ASC/ASCQ:0xff/0xff
ioc:0 phys_id:0 scsi_id:1 vendor:ATA product_id:SAMSUNG HE160HJ revision:0-24 size(GB):149 state: ONLINE flags: NONE sync_state: 100 ASC/ASCQ:0xff/0xff SMART ASC/ASCQ:0xff/0xff
scsi_id:1 100%
scsi_id:0 100%
non raid machine:
# mpt-status -n
open /dev/mptctl: No such file or directory
Try: mknod /dev/mptctl c 10 220
Make sure mptctl is loaded into the kernel
# mpt-status -n > /dev/null
open /dev/mptctl: No such file or directory
Try: mknod /dev/mptctl c 10 220
Make sure mptctl is loaded into the kernel
# mpt-status -n > /dev/null 2>&1 ; echo $?
1
warn Dumper \@hashes:
$VAR1 = [
{
'flags' => 'ENABLED',
'ioc' => '0',
'raidlevel' => 'RAID-1',
'state' => 'OPTIMAL',
'vol_id' => '0',
'size(GB)' => '148',
'type' => 'IM',
'num_disks' => '2'
},
{
'sync_state' => '100',
'ASC/ASCQ' => '0xff/0xff',
'flags' => 'NONE',
'ioc' => '0',
'phys_id' => '1',
'state' => 'ONLINE',
'scsi_id' => '8',
'revision' => '0-24',
'size(GB)' => '149',
'product_id' => 'SAMSUNG HE160HJ',
'vendor' => 'ATA'
},
{
'sync_state' => '100',
'ASC/ASCQ' => '0xff/0xff',
'flags' => 'NONE',
'ioc' => '0',
'phys_id' => '0',
'state' => 'ONLINE',
'scsi_id' => '1',
'revision' => '0-24',
'size(GB)' => '149',
'product_id' => 'SAMSUNG HE160HJ',
'vendor' => 'ATA'
},
{
'scsi_id' => '1 100%'
},
{
'scsi_id' => '0 100%'
}
];
visudo sample:
nobody ALL=(ALL) NOPASSWD: /usr/sbin/mpt-status
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment