Skip to content

Instantly share code, notes, and snippets.

@danboid
Created April 29, 2015 15:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danboid/995fbc789b19b41c08a4 to your computer and use it in GitHub Desktop.
Save danboid/995fbc789b19b41c08a4 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl -w
use strict;
use Getopt::Std;
use Socket;
use Fcntl;
use Fcntl ':mode';
use Data::Dumper;
# Global variables for error checking against
# DF Full Level
my $FULLLEVEL = 90;
my $UPTIME = 3600;
my $FIVEMIN = 1.0;
my (%hosts, %badhosts);
my ($portname, $temp, $line);
my ($ref,$bad,$tmp,@raid);
my ($inftype);
my ($retVal,$retSummary,$retDetail,%opts,$hostListRef,$hostInfoRef);
#local (*SOCK);
$retVal = 0;
$retSummary = $retDetail = '';
#---------------------------------------------------------------
#
# Options:-
#
# h - Help (usage)
# D - debug run, used before installing or to check on servers
# P - Port number/name (medinf)
# T - Timeout (5 seconds)
# d - Disk usage threshold (85)
# r - RAID check (Values for active/working/spare/failed all separated by slashes)
# u - Uptime check (value entered in a min in hours and a max in days, seperated by a slash / )
# l - 5 min load avg, enter float value, alarms if greater
# p - Max number of processes, both running and total separated by a slash (running/total)
# b - DB Space threshold, followed by selected db spaces, all separated by colons (eg 80:1:2:3:4)
# M - Reports if lost mesages are present on a GPRS station
# f - Check for read only file systems
# s - Swap space check
# z - Check size of radius files (Voda and O2 only)
# c - RedHat Clustering Check
# e - EMS Deamon process checking
$opts{'D'} = 0;
$opts{'P'} = 'medinf';
$opts{'T'} = 5;
getopts('hDp:t:d:I:l:u:v:V:g:G:r:m:kp:b:fRMcezs',\%opts);
if (defined $opts{'h'})
{
ShowUsage();
}
$hostListRef =
[
# 'z',
# 'zam',
# 'hinge','bracket','nmcpri','viper',
# 'emailgw15-1',
# 'ms2-1','ms2-5',
# 'ms15-1','ms15-2','ms15-3',
# 'as2-2','as2-3',
# 'as15-1','as15-2',
# 'gsms2-1','auth2-3','vpn2-2',
# 'gprs2-1','gprs2-2','gprs2-3',
# 'gprs15-1','gprs15-2','gprs15-3','gprs15-4',
# 'host2-1','host2-2','host2-3','host2-4','host2-5','host2-6',
# 'host2-7','host2-8','host2-9',
# 'host15-1','host15-2','host15-3','host15-4','host15-5',
# 'host15-6','host15-7','host15-8','host15-9','host15-10','host15-11',
# 'host15-12','host15-13'
];
ReadHosts($hostListRef);
# Connect to the servers, stream the data back and then decode into a strctured hash
{
my ($h,$rawHostInfoRef);
# OK Here's the clever bit, make multiple connections to the servers, stream all the data back in
# parallel and store in a hash
$rawHostInfoRef = ConnectAndStream( $opts{'P'},$hostListRef, $opts{'T'});
# print join ':',keys (%{$rawHostInfoRef}) , "\n\n";;
# Check for basic connection errors
foreach $h (grep !/^\-/,keys (%{$rawHostInfoRef}) )
{
if ($rawHostInfoRef->{$h}->{error} == 1)
{
$retVal = 2;
$retSummary .= $h.' ';
$retDetail .= sprintf "%-13s - No Response (%s)\n",$h,$rawHostInfoRef->{$h}->{errstr};
delete $rawHostInfoRef->{$h};
}
}
# print join ':',keys (%{$rawHostInfoRef}) , "\n\n";;
$hostInfoRef = DecodeRawInfo($rawHostInfoRef,$opts{'D'});
}
# Now at this point the raw info has been structured into a nice hash, so its time to do that specific stuff and check if its OK
{
my ($h);
foreach $h (sort keys (%{$hostInfoRef}))
{
my ($param);
foreach $param (sort keys %{$hostInfoRef->{$h}})
{
CheckParam(\$retVal,\$retSummary,\$retDetail,$h,$param,$hostInfoRef->{$h}->{$param});
}
}
}
if ($retVal == 0)
{
$retDetail =~ s/\n/<br>/ig;
print "Ok $retDetail";
exit 0;
}
else
{
$retDetail =~ s/\n/<br>/ig;
print $retDetail;
exit $retVal;
}
exit 3;
#-------------------------------------------------------------------------------------------------------------------------------------------
sub ShowUsage
{
# Options:-
#
# D - debug run, used before installing or to check on servers
# P - Port number/name (medinf)
# T - Timeout (5 seconds)
# d - Disk usage threshold (85)
# r - RAID check (Values for active/working/spare/failed all separated by slashes)
# u - Uptime check (value entered in a min in hours and a max in days, seperated by a slash / )
# l - 5 min load avg, enter float value, alarms if greater
# p - Max number of processes, both running and total separated by a slash (running/total)
# b - DB Space threshold, followed by selected db spaces, all separated by colons (eg 80:1:2:3:4)
# c - RedHat Cluster services check
print STDERR "\n";
print STDERR "\n";
print STDERR "$0 : Usage\n";
print STDERR "$0 [-D] [-P <number|service>] [-T <timeout>] [-d <threshold>] [-r <pattern>] [-u <pattern>] [-l <max>] [-p <pattern>] [-k]\n";
print STDERR "\n";
print STDERR "$0 is a script primarily for running under mon to determine server environmental issues, previously handled by df web page\n";
print STDERR "\n";
print STDERR "-D - Debug - Switching this option on runs the script in a debug mode, useful for finding out what is actually\n";
print STDERR " going on. Combine with other switches for more useful info. Default is off. Run from a shell.\n";
print STDERR "-P - Port to query on the server, can be specified as number or service name, default medinf\n";
print STDERR "-d - Check disk usage, followed by threshold value of percent utilised\n";
print STDERR "-r - Check for RAID faults, specified as pattern 'A/W/F/S', usual value is '2/3/0/1'\n";
print STDERR "-u - Check for notifiable uptime, either too low or too high, specified as 'min/max' where min in hours, max in days\n";
print STDERR "-l - Check for loadavg, specified as float value for 5 min load, alerts if exceeded\n";
print STDERR "-p - Check for abnormal numbers of process, specified as 'running/total'\n";
print STDERR "-s - Check for swap memory being used\n";
print STDERR "-k - No alarm raised, only useful in debug mode, specifies the kernel running on server\n";
print STDERR "-b - Check database space usage for nodes running informix 9+. Input is the threshold percentage followed by the database spaces needed to be checked, separated by colons (eg 80:1:2:3:4)\n";
print STDERR "-f - Check for read only file systems.\n";
print STDERR "-c - Check status of RedHat Cluster services.\n";
print STDERR "-z - Check Radius Login file size.\n";
print STDERR "\n";
print STDERR "\n";
print STDERR "\n";
exit 3;
}
#-------------------------------------------------------------------------------------------------------------------------------------------
# Pass this function a port number, then an arrayref containing hostnames and it will
# attempt to connect to all hosts in parallel on that port and retrieve a status string
# It will run until all connections have been closed by the remote end or nothing has
# been received for timeout seconds, which could be a fraction of a second,
# it will then return a hash ref as follows:-
# href-> {'-retval'} - Overall retcode
# {hostname} -> {error} - 0 - OK
# - 1 - Error occurred
# {errstr} - String describing error
# {retstr} - String returned from remote port
#
# Also function maintains internal hash to hold info during the query
#
# ihref-> {hostname} -> {addr} - Address of host
# -> {sock} - Filehandle of the socket
# -> {file} - File descriptor number to be used during select
sub ConnectAndStream
{
my ($port,$hostRef,$timeout) = @_;
my ($href,$ihref);
my ($hr,$addr,$proto,$retVal,$finished);
$retVal = 0;
$href = {};
$ihref = {};
# Get the protocol value
$proto = getprotobyname('tcp');
# Get the port number assuming it has been passed by name
if ($port !~ m/^\d+$/)
{
$port = getservbyname($port,'tcp');
}
if (!defined $port)
{
return undef;
}
# Lookup all the hostnames and store in the internal hash ready for connecting
foreach $hr (@{$hostRef})
{
my ($r,$ir);
$href->{$hr} = $r = {};
$r->{error} = 0; # Assume things start well
$ihref->{$hr} = $ir = {};
$ir->{addr} = $addr = gethostbyname($hr);
if (!defined $addr)
{
$retVal = 1;
$r->{error} = 1;
$r->{errstr} = "Host lookup error";
$ir->{file} = -1;
}
}
# This code causes us to return if any lookup problems, that return could be done at the point
# of first failure, rather than whole lot, or not done at all, as fd is set to -1 so won't get
# included in select statement
# if ($retVal == 1)
# {
# $href->{'-retval'} = $retVal;
# return $href;
# }
# Go through all hosts and try to connect
foreach $hr (@{$hostRef})
{
my ($r,$ir,$paddr,$buffer,$oldfh);
$r = $href->{$hr};
if ($r->{error} == 1)
{
next;
}
$ir = $ihref->{$hr};
$addr = $ir->{addr};
# Create socket and get fd number for building seletr statement
$paddr = sockaddr_in($port,$addr);
socket($ir->{sock},PF_INET,SOCK_STREAM,$proto);
$ir->{file} = fileno ($ir->{sock});
# Make socket NON_BLOCK before connect
$buffer = 0;
if ( (!fcntl ($ir->{sock},F_GETFL,$buffer)) || (!fcntl ($ir->{sock},F_SETFL,$buffer | O_NONBLOCK)) )
{
$retVal = 1;
$r->{error} = 1;
$r->{errstr} = "Can't fcntl on socket: $!";
$ir->{file} = -1;
next;
}
# Connecting a non block socket immediately returns with an error, however connection continues\
# in the background and we get the real error or not from the select
if ( (!connect($ir->{sock},$paddr)) && ($! ne 'Operation now in progress'))
{
$retVal = 2;
$r->{error} = 1;
$r->{errstr} = "Can't connect: $!";
$ir->{file} = -1;
next;
}
# Beware this is not a select, this merely makes the socket unbuffered
$oldfh = select($ref); $| = 1; select($oldfh);
#print "DEBUG: Host $hr, fd $ir->{file} \n";
}
#print "DEBUG: All connections started, now lets see what happens...\n";
# When we start we're not finished
$finished = 0;
while (!$finished)
{
my ($rbits,$wbits,$ebits);
my ($h,$reading,$tmo,$nfound);
# We could be finished on this loop
$finished = 1;
$rbits = $wbits = $ebits = '';
# Create the bit vectors for the select statement
foreach $h (@{$hostRef})
{
if ($ihref->{$h}->{file} >= 0)
{
#print "DEBUG: Adding $h fd to vector\n";
vec($rbits,$ihref->{$h}->{file},1) = 1;
$finished = 0;
}
}
if ($finished == 1)
{
# No point going on
next;
}
$tmo = $timeout;
#printf "DEBUG: About to select\n";
$nfound = select ($rbits,$wbits,$ebits,$tmo);
# If nothing ready after timeout, then no point going on
if ($nfound == 0)
{
$finished = 1;
next;
}
# Read all fds that are ready
foreach $h (@{$hostRef})
{
my ($num,$temp,$ir,$r);
$ir = $ihref->{$h};
$r = $href->{$h};
if (vec($rbits,$ir->{file},1))
{
$num = sysread ($ir->{sock}, $temp, 4096);
if (!defined $num)
{
$retVal = 2;
$ir->{file} = -1;
$r->{error} = 1;
$r->{errstr} = "Read error: $!";
#print "DEBUG: Read error on $h: $!\n";
}
elsif ($num == 0)
{
#print "DEBUG: Shutting down $h fd $ir->{file} \n";
$ir->{file} = -1;
close($ir->{sock});
}
else
{
$r->{retstr} .= $temp;
}
}
}
}
# All fds are exahusted now or errored, so shut down any that are left
foreach $hr (@{$hostRef})
{
if ($ihref->{$hr}->{file} >= 0)
{
close ($ihref->{$hr}->{sock});
}
}
$href->{'-retval'} = $retVal;
return $href;
}
#---------------------------------------------------------------------------------------
sub ReadHosts
{
my ($ref) = @_;
# All the other arguments should be hostnames to connect to
while($ARGV[0])
{
push @{$ref},shift @ARGV;
}
}
#---------------------------------------------------------------------------------------
sub ExtractLine
{
my ($iref) = @_;
my ($i,$line);
$line = '';
if ($$iref =~ m/^(.*?)$(\r|\n{1,2})(.*)$/sm)
{
$line = $1;$$iref = $3;
#print "iref %%% $$iref %%%%\n";
}
chomp $line;
$line;
}
#---------------------------------------------------------------------------------------
#
# This decodes the raw info received from the socket into a nicely structured hash/array structure as follows:-
#
# ref -> {hostname} -> {paramname} -> []
#
# ref is returned by this function
# hostname will be the hostname of the host in question, param can be raid, uptime, load, df, basically its taken
# from the !!!start text received from the socket
# Then for each param an array where each element is a line of data related to that paremeter
#
sub DecodeRawInfo
{
my ($rawRef,$debug) = @_;
my ($line,$h,$hr);
$hr = {};
foreach $h (sort grep !/^\-/,keys (%{$rawRef}) )
{
my ($state,$raw,$param,$ir);
$hr->{$h} = $ir = {};
$state = 'end';
if ($debug == 1)
{
printf "Host %13s -> ",$h;
}
$raw = $rawRef->{$h}->{retstr};
if (defined $raw && $raw ne '')
{
while ($line = ExtractLine(\$raw))
{
#print "Processing line '$line' \n";
if (($state eq 'data') && ($line !~ m/^!!!/))
{
push @{$ir->{$param}},$line;
}
if (($state eq 'data') && ($line =~ m/^!!!end/))
{
$state = 'end';
}
elsif ($state eq 'end')
{
if ($line =~ m/^!!!start\s+(.*?)\s+info!!!$/)
{
$param = $1;
if ($debug == 1)
{
print "$param ";
}
$ir->{$param} = [];
$state = 'data';
}
else
{
print "SW Error on host $h: Failed to match a start line $line \n";
}
}
}
}
if ($debug == 1)
{
print "\n";
}
}
$hr;
}
#---------------------------------------------------------------------------------------
sub CheckParam
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
if ($param eq 'df' && (defined $opts{'d'}))
{
CheckDf($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'inode' && (defined $opts{'I'}))
{
CheckInode($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'kernel' && (defined $opts{'k'}))
{
CheckKernel($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'uptime' && (defined $opts{'u'}))
{
CheckUptime($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'load' && (defined $opts{'l'}))
{
CheckLoad($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'load' && (defined $opts{'p'}))
{
CheckPCount($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'ps' && (defined $opts{'p'}))
{
CheckPs($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'raid' && (defined $opts{'r'}))
{
CheckRaid($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'megaraid' && (defined $opts{'m'}))
{
CheckMegaraid($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'db_usage' && (defined $opts{'b'}))
{
CheckDBUsage($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'route' && (defined $opts{'R'}))
{
CheckRoute($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'lostmsg' && (defined $opts{'M'}))
{
CheckLostMsgs($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'vdsCount' && (defined $opts{'v'}))
{
CheckVdsMsg($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'vdsCount' && (defined $opts{'g'}))
{
CheckVdsSize($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
#TEMPORARY CHECK FOR G4S
if ($param eq 'vdsG4SCount' && (defined $opts{'V'}))
{
CheckG4SVdsMsg($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'vdsG4SCount' && (defined $opts{'G'}))
{
CheckG4SVdsSize($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'readOnly' && (defined $opts{'f'}))
{
readOnly($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'cluster' && (defined $opts{'c'}))
{
rhCluster($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'radfileSize' && (defined $opts{'z'}))
{
radfileSize($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'memfree' && (defined $opts{'s'}))
{
CheckSwap($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
if ($param eq 'tibemsd' && (defined $opts{'e'}))
{
CheckEMS($rvRef,$rsRef,$rdRef,$host,$param,$aref);
}
}
#---------------------------------------------------------------------------------------
sub CheckEMS
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($i,$proc,$memory,$port);
if ($opts{'D'} == 1)
{
printf "Host %13s -> %s\n",$host,$aref->[0];
}
foreach $i (1 .. $#$aref)
{
($proc,$memory,$port) = split (' ',$aref->[$i]);
if (($memory > 768000) && ($memory < 1048576 ))
{
$$rvRef = 1;
$$rdRef .= sprintf "Warning! Process id %s (listening at %s) is using %u K (more than 750meg) of memory\n",$proc,$port,$memory;
$$rsRef .= $host.' ';
}
if (($memory > 1048576) && ($memory < 768000 )) {
$$rvRef = 2;
$$rdRef .= sprintf "Critical! Process id %s (listening at %s) is using %u K (more than 1gig) of memory\n",$proc,$port,$memory;
$$rsRef .= $host.' ';
}
if (($$rvRef != 1) && ($$rvRef != 2)) {
$$rvRef = 0;
$$rdRef .= sprintf "Ok! Process %s is using %u K of memory\n",$proc,$memory;
$$rsRef .= $host.' ';
}
}
}
sub CheckSwap
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($i,$total,$used,$free,$percused);
if ($opts{'D'} == 1)
{
printf "Host %13s -> %s\n",$host,$aref->[0];
}
foreach $i (1 .. $#$aref)
{
if ($aref->[$i] =~ m/^Swap:/)
{
(undef,$total,$used,$free) = split (' ',$aref->[$i]);
if ($total == 0)
{
$total = 0.001;
$$rvRef = 2;
$$rdRef .= sprintf "Critical! %s has no swap available\n",$host;
}
$percused = ($used / $total)*100;
#print "Total is $total\n";
#print "Used is $used\n";
# print "Free is $free\n";
# print "Percent used is $percused\n";
if ($percused > 75)
{
$$rvRef = 2;
$$rdRef .= sprintf "Critical! %s is using %.2f percent (>75 percent) of swap\n",$host,$percused;
$$rsRef .= $host.' ';
} else {
if ($percused > 60)
{
$$rvRef = 1;
$$rdRef .= sprintf "Warning! %s is using %.2f percent (>60 percent) of swap\n",$host,$percused;
$$rsRef .= $host.' ';
}
if (($$rvRef != 1) && ($$rvRef != 2)) {
$$rvRef = 0;
$$rdRef .= sprintf "Ok! %s is using %.2f percent of swap\n",$host,$percused;
$$rsRef .= $host.' ';
}
}
}
}
}
sub CheckDf
{
# temp debugging added 26-04-11 will write output to file in /var/tmp/dfdebug
# to check for veriable values. SteveB
# 27-04-11 Problem found: logger15-1 had an unusually large device path to two of its mounted
# partions, this caused the df command to display the output for the partions on two lines.
# This cused the dfchecker.pl script to have undef veriables, and output error messages to mon.
# If the problem occurs again uncomment the liens below to output debug info to /var/tmp/dfdebug
# Then alter the /usr/local/bin/medmachinf on the system causing the problems to run the df command
# with the -P flag.
# open (MYFILE, '>>/var/tmp/dfdebug');
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($bad,$i,$dev,$mp,$val,$v);
if ($opts{'D'} == 1)
{
foreach $i (0 .. $#$aref)
{
printf "Host %13s -> %s\n",$host,$aref->[$i];
}
}
$bad = 0;
foreach $i (1 .. $#$aref)
{
if ($aref->[$i] =~ m/^\/dev/)
{
($dev,undef,undef,undef,$v,$mp) = split (' ',$aref->[$i]);
$v =~ s/^(\d*)%/$1/g;
#if(not defined $v)
#{
# #variable is undefined
# print MYFILE "HOST: $host\n";
# print MYFILE "The file system causing problems is $aref->[$i]\n";
#}
$val = $v;
if ($val > $opts{'d'})
{
$$rvRef = 2;
if ($bad == 0)
{
$bad = 1;
$$rsRef .= $host.' ';
}
}
$$rdRef .= sprintf "%14s -> %u%% \n",$mp,$val;
}
}
# close (MYFILE);
}
#---------------------------------------------------------------------------------------
sub CheckLostMsgs
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($i);
if ($opts{'D'} == 1)
{
printf "Host %13s -> %s\n",$host,$aref->[0];
}
if ($aref->[0] eq "yes")
{
$$rvRef = 2;
$$rsRef .= $host.' ';
$$rdRef .= sprintf "%-13s - Lost messages in /usr/medway for GPRS station(s)\n",$host;
}
}
#---------------------------------------------------------------------------------------
sub CheckKernel
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
$aref->[0] =~ s/^.*?\s(.*)$/$1/;
if ($opts{'D'} == 1)
{
printf "Host %13s -> %s\n",$host,$aref->[0];
}
}
#---------------------------------------------------------------------------------------
sub CheckUptime
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($uptime,$idletime);
my ($min,$max);
($min,$max) = split('/',$opts{'u'});
$min *= 3600; # min is displayed in hours
$max *= 3600 * 24; # max is displayed in days
($uptime,$idletime) = split(' ',$aref->[0]);
if ($opts{'D'} == 1)
{
printf "Host %13s -> Up %10u Idle %10u (Up %u days, %0.2f%% busy)\n",
$host,$uptime,$idletime,$uptime/(3600*24),($uptime-$idletime)/$uptime;
}
if (($uptime < $min) || ($uptime > $max))
{
$$rsRef .= $host.' ';
if ($uptime < $min)
{
$$rvRef = 1;
$$rdRef .= sprintf "Warning: Uptime %u hours\n",$uptime/3600;
}
else
{
$$rvRef = 0;
$$rdRef .= sprintf "Warning. High uptimes (%u days in this case) make rebooting dangerous.\n",$uptime/(24*3600);
}
}
if (($uptime >= $min) && ($uptime <= $max))
{
$$rdRef .= sprintf "Ok: Uptime %u days\n",$uptime/(24*3600);
}
}
#---------------------------------------------------------------------------------------
sub CheckLoad
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($load);
if ($opts{'D'} == 1)
{
printf "Host %13s -> %s\n",$host,$aref->[0];
}
$load = (split(' ',$aref->[0]))[0];
if ($load > $opts{'l'})
{
$$rvRef = 2;
$$rsRef .= $host.' ';
}
$$rdRef .= sprintf "5 minute load average: %0.2f\n",$load;
}
#---------------------------------------------------------------------------------------
sub CheckPs
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
}
#---------------------------------------------------------------------------------------
sub CheckRoute
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my $i;
if ( defined $aref->[0] && $aref->[0] =~ /changed/ )
{
$$rvRef = 2;
$$rsRef .= $host.' ';
$$rdRef .= sprintf "Errors in Routing Table %s<br> ",$host;
foreach $i (0 .. $#$aref)
{
$$rdRef .= sprintf "%s<br> ", $aref->[$i];
}
}
}
#---------------------------------------------------------------------------------------
sub CheckPCount
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($pcount,$rcount);
my ($rlimit,$plimit);
($rcount,$pcount) = split('/',(split(' ',$aref->[0]))[3]);
($rlimit,$plimit) = split('/',$opts{'p'});
if ($opts{'D'} == 1)
{
printf "Host %13s -> %u/%u\n",$host,$rcount,$pcount;
}
if (($rcount > $rlimit) || ($pcount > $plimit))
{
$$rvRef = 1;
$$rsRef .= $host.' ';
$$rdRef .= sprintf "%-13s -> Running/Total %u/%u \n",$host,$rcount,$pcount;
}
}
#---------------------------------------------------------------------------------------
sub CheckRaid
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($active,$working,$failed,$spare);
my ($i,@rParam,%rHash,$bad);
if ($opts{'D'} == 1)
{
printf "Host %13s -> %s\n",$host,$aref->[0];
}
$bad = 0;
foreach $i (0 .. $#$aref)
{
my ($val,$pNam,$pVal,$p);
# Check each line is a valid raidinfo line
if ($aref->[$i] !~ m/^device_name/)
{
$$rvRef = 1;
$$rsRef .= $host.' ';
$$rdRef .= sprintf "%-13s - Corrupt raidinfo line\n",$host;
return;
}
# split the raidinfo line into various params
@rParam = split(':',$aref->[$i]);
# Then attach to a hash
foreach $p (@rParam)
{
my ($pNam,$pVal);
($pNam,$pVal) = split(' ',$p);
$rHash{$pNam} = $pVal;
}
$val = $rHash{'active_disks'} .'/'. $rHash{'working_disks'} .'/'. $rHash{'failed_disks'}.'/'. $rHash{'spare_disks'};
if ($opts{'r'} ne $val)
{
$$rvRef = 1;
if ($bad == 0)
{
$$rsRef .= $host.' ';
$bad = 1;
}
$$rdRef .= sprintf "%-13s - Raidinfo for dev %s is %s\n",$host,$rHash{'device_name'},$val;
}
}
}
#---------------------------------------------------------------------------------------
sub CheckMegaraid
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($active,$working,$failed,$spare);
my ($i,@rParam,%rHash,$bad);
if ($opts{'D'} == 1)
{
printf "Host %13s -> %s\n",$host,$aref->[0];
}
$bad = 0;
if ($aref->[0] !~ m/^Logical.*optimal/)
{
$$rsRef .= $host.' ';
$bad = 1;
$$rvRef = 1;
$$rdRef .= sprintf "%-11s - %-13s - Raid Failure\n",$host,$aref->[0];
}
foreach $i (1 .. $#$aref)
{
my ($val,$pNam,$pVal,$p);
# Check each line of raidinfo
if ($aref->[$i] !~ m/^Channel.*Online\./)
{
if ($bad == 0)
{
$$rsRef .= $host.' ';
$bad = 1;
}
$$rvRef = 1;
$$rdRef .= sprintf "%-11s - %-13s - Drive Failure\n",$host,$aref->[$i];
}
}
}
#---------------------------------------------------------------------------------------
sub CheckDBUsage
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($i,$j,$state,$bad,@args,$threshold,$used,$percent);
my $dbRef = [ ];
my $chuRef;
@args=split(':',$opts{'b'}); #split up the arguments
$threshold=shift(@args); #strip the threshold off the arguments
if ($opts{'D'} == 1)
{
foreach $i (0 .. $#$aref)
{
printf "Host %13s -> %s\n",$host,$aref->[$i];
}
}
$bad = 0;
$state = 0;
foreach $i (0 .. $#$aref)
{
#print "\n$aref->[$i]\n";
if($aref->[$i] =~ m/^Chunks/) #after the word chunks comes our useful data
{
$state=1;
if ($opts{'D'} == 1)
{
print "Changed State (DB Checker)\n";
}
}
if($aref->[$i] =~ m/\d+\s+active,/) #after the word active is not useful data
{
$state=0;
if ($opts{'D'} == 1)
{
print "Changed State (DB Checker)\n";
}
}
if($state == 1)
{
if($aref->[$i] =~ /\w+\s+\d+\s+(\d).*\/dev\//) # check to see if the line contains the data we are after, and return the dbspace number
{
#print "hello .... $1 ..... \n";
foreach $j (@args) # go through each dbspace specifed at the command line
{
if($j == $1) # if db space number matches one of the db space numbers specifed by the user
{
if(!defined($dbRef->[$j])) # if it is not already created, make a hash reference in the array reference dbRef under the index of the db space
{
$dbRef->[$j]= { };
}
$chuRef=$dbRef->[$j]; # make chuRef the hash reference for $dbRef->[$j]= { }
#print "$j\n";
$aref->[$i] =~ m/\w+\s+\d+\s+\d+\s+\d+\s+(\d+).*\/dev\//; # find how much space is allocated in the chunk
$chuRef->{'space'} += $1;
$aref->[$i] =~ m/\w+\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+).*\/dev\//;# find how much space is free.
$chuRef->{'free'} += $1;
$aref->[$i] =~ m/(\/dev\/\w+\/\w+)/; # find the device the chunk is allocated under
$chuRef->{'dev'} .= "$1 ";
#print "$dbRef->[$j]->{'space'}\n";
#print "$dbRef->[$j]->{'free'}\n";
#print "$dbRef->[$j]->{'dev'}\n\n";
undef $chuRef; # clean up
}
}
}
}
}
foreach $i (0 .. $#$dbRef) # go through the data collected
{
if(defined $dbRef->[$i]) # not all indecies may be defined
{
#print "Space $dbRef->[$i]->{'space'}; \nFree $dbRef->[$i]->{'free'}; \n";
$used = ($dbRef->[$i]->{'space'}) - ($dbRef->[$i]->{'free'}); # work out how much space has been used
#print "Used $used\n";
$percent = ($used/$dbRef->[$i]->{'space'})*100; # work out the percentage free space
#print "Percent $percent \n";
#print "Threshold $threshold\n";
if($percent > $threshold)
{
#print "ok\n";
if ($bad == 0)
{
$bad = 1;
$$rsRef .= $host.' ';
}
$$rvRef = 1;
$$rdRef .= sprintf "%-13s - %-10s -> %u%% \n",$host,$dbRef->[$i]->{'dev'},$percent;
}
}
}
}
#---------------------------------------------------------------------------------------
sub CheckVdsSize
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($i,$bad);
$bad=0;
foreach $i (0 .. $#$aref)
{
if($aref->[$i] =~ m/VDS Count script has timed out/)
{
if($bad != 1)
{
$$rsRef .= $host.' ';
$$rdRef .= sprintf "Size of messages in VDS for %s\n",$host;
}
$$rvRef = 2;
$bad=1;
$$rdRef .= "VDS Count script has timed out, Please check";
}
if($aref->[$i] =~ m/(\S+)\s+\w+:\s+(\w+)\s+Backlog:\s+(\d+)/)
{
if ($3 > $opts{'g'})
{
if($bad != 1)
{
$$rsRef .= $host.' ';
$$rdRef .= sprintf "Size of messages in VDS for %s\n",$host;
}
$$rvRef = 2;
$bad=1;
}
$$rdRef .= sprintf "%-13s - %-10s -> %uK \n",$1,$2,$3;
}
}
}
#---------------------------------------------------------------------------------------
sub CheckVdsMsg
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($i,$bad);
$bad=0;
foreach $i (0 .. $#$aref)
{
#print "\n$aref->[$i]\n";
if($aref->[$i] =~ m/VDS Count script has timed out/)
{
if($bad != 1)
{
$$rsRef .= $host.' ';
$$rdRef .= sprintf "Size of messages in VDS for %s\n",$host;
}
$$rvRef = 2;
$bad=1;
$$rdRef .= "VDS Count script has timed out, Please check";
}
#print "\n$aref->[$i]\n";
if($aref->[$i] =~ m/(\S+)\s+\w+:\s+(\w+)\s+\w+:\s+\d+\s+NumMsgs:\s+(\d+)/)
{
#print "$1 $2 $3\n";
if ($3 > $opts{'v'})
{
if($bad != 1)
{
$$rsRef .= $host.' ';
$$rdRef .= sprintf "Number of messages in VDS for %s\n",$host;
}
$$rvRef = 2;
$bad=1;
}
$$rdRef .= sprintf "%-13s - %-10s -> %u \n",$1,$2,$3;
}
}
}
#---------------------------------------------------------------------------------------
sub CheckG4SVdsSize
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($i,$bad);
$bad=0;
foreach $i (0 .. $#$aref)
{
#print "\n$aref->[$i]\n";
if($aref->[$i] =~ m/(\S+)\s+\w+:\s+(\w+)\s+Backlog:\s+(\d+)/)
{
#print "$1 $2 $3\n";
if ($3 > $opts{'G'})
{
if($bad != 1)
{
$$rsRef .= $host.' ';
$$rdRef .= sprintf "Size of messages in VDS for %s\n",$host;
}
$$rvRef = 1;
$bad=1;
$$rdRef .= sprintf "%-13s - %-13s - %-10s -> %uK \n",$host,$1,$2,$3;
}
}
}
}
#---------------------------------------------------------------------------------------
sub CheckG4SVdsMsg
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($i,$bad);
$bad=0;
foreach $i (0 .. $#$aref)
{
#print "\n$aref->[$i]\n";
if($aref->[$i] =~ m/(\S+)\s+\w+:\s+(\w+)\s+\w+:\s+\d+\s+NumMsgs:\s+(\d+)/)
{
#print "$1 $2 $3\n";
if ($3 > $opts{'V'})
{
if($bad != 1)
{
$$rsRef .= $host.' ';
$$rdRef .= sprintf "Number of messages in VDS for %s\n",$host;
}
$$rvRef = 1;
$bad=1;
$$rdRef .= sprintf "%-13s - %-13s - %-10s -> %u \n",$host,$1,$2,$3;
}
}
}
}
#---------------------------------------------------------------------------------------
sub CheckInode
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($bad,$i,$dev,$mp,$val,$v);
if ($opts{'D'} == 1)
{
foreach $i (0 .. $#$aref)
{
printf "Host %13s -> %s\n",$host,$aref->[$i];
}
}
$bad = 0;
foreach $i (1 .. $#$aref)
{
if ($aref->[$i] =~ m/^\/dev/)
{
($dev,undef,undef,undef,$v,$mp) = split (' ',$aref->[$i]);
$v =~ s/^(\d*)%/$1/g;
$val = $v;
if ($val ne '-')
{
if ($val > $opts{'I'})
{
$$rvRef = 2;
if ($bad == 0)
{
$bad = 1;
$$rsRef .= $host.' ';
}
}
$$rdRef .= sprintf "%14s -> %u%% \n",$mp,$val;
}
}
}
}
#---------------------------------------------------------------------------------------
sub readOnly
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($fail) = 0;
foreach (0 .. $#$aref)
{
if ($opts{'D'} == 1 ){
print $aref->[$_], "\n";
}
if($aref->[$_] =~ /FAIL\s(.*)/)
{
$$rvRef = 2;
$fail = 1;
$$rdRef .= sprintf "%-13s - READ-ONLY %s\n", $host, $1;
}
}
if( $fail == 1 )
{
$$rsRef .= $host.' ';
}
}
#---------------------------------------------------------------------------------------
sub radfileSize
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($fail) = 0;
foreach (0 .. $#$aref)
{
if ($opts{'D'} == 1 ){
print $aref->[$_], "\n";
}
$$rvRef = 1;
$fail = 1;
$$rdRef .= sprintf "%-13s - RADIUS FILE TOO LARGE - %s\n", $host, $aref->[$_];
}
if( $fail == 1 )
{
$$rsRef .= $host.' ';
}
}
#---------------------------------------------------------------------------------------
sub rhCluster
{
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_;
my ($i,$serv);
my $fail=0;
my $qdisk=0;
foreach $i (0 .. $#$aref)
{
if($aref->[$i] =~ /refused/)
{
$$rvRef = 2;
$fail = 1;
$$rdRef .= sprintf "Cluster services are not running on %-13s\n", $host;
}
if($aref->[$i] =~ /Local/)
{
if($aref->[$i] !~ /rgmanager/)
{
$$rvRef = 2;
$fail = 1;
$$rdRef .= sprintf "rgmanager service is not running on %-13s\n", $host;
}
elsif ($opts{'D'} == 1)
{
printf "rgmanager service is running on %-13s\n", $host;
}
}
if($aref->[$i] !~ /Local/ && $aref->[$i] !~ /Quorum Disk/ && $aref->[$i] =~ /Online/ && $aref->[$i] !~ /rgmanager/)
{
$$rvRef = 2;
$fail = 1;
$aref->[$i] =~ /(\S+)/;
$$rdRef .= sprintf "%s reports rgmanager service is not running on %s, please check the rgmanager service on both nodes\n", $host ,$1;
#print $aref->[$i]."\n";
}
elsif ( $opts{'D'} == 1 && $aref->[$i] !~ /Local/ && $aref->[$i] !~ /Quorum Disk/ && $aref->[$i] =~ /Online/ && $aref->[$i] =~ /rgmanager/)
{
$aref->[$i] =~ /(\S+)/;
printf "%s reports rgmanager service is running on %s\n", $host ,$1;
}
if($aref->[$i] =~ /Quorum Disk/)
{
$qdisk=1;
}
if($aref->[$i] =~ /service\:(\w+)/)
{
$serv=$1;
if($aref->[$i] !~ /started/)
{
$$rvRef = 2;
$fail = 1;
$$rdRef .= sprintf "Clustered service \"%s\" is not shown as running on %-13s\n", $serv, $host;
}
elsif($opts{'D'} == 1)
{
printf "Clustered service \"%s\" is shown as running on %-13s\n", $serv, $host;
}
}
}
if($qdisk == 0)
{
$$rvRef = 1;
$fail = 1;
$$rdRef .= sprintf "Quorum disk is not present on %-13s\n", $host;
}
elsif ($opts{'D'} == 1)
{
printf "Quorum disk is present on %-13s\n", $host;
}
if( $fail == 1 )
{
$$rsRef .= $host.' ';
} else {
$$rdRef .= sprintf "Cluster seems ok on %-13s\n", $host;
}
}
#---------------------------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment