Created
April 29, 2015 15:04
-
-
Save danboid/995fbc789b19b41c08a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use Getopt::Std; | |
use Socket; | |
use Fcntl; | |
use Fcntl ':mode'; | |
use Data::Dumper; | |
# Global variables for error checking against | |
# DF Full Level | |
my $FULLLEVEL = 90; | |
my $UPTIME = 3600; | |
my $FIVEMIN = 1.0; | |
my (%hosts, %badhosts); | |
my ($portname, $temp, $line); | |
my ($ref,$bad,$tmp,@raid); | |
my ($inftype); | |
my ($retVal,$retSummary,$retDetail,%opts,$hostListRef,$hostInfoRef); | |
#local (*SOCK); | |
$retVal = 0; | |
$retSummary = $retDetail = ''; | |
#--------------------------------------------------------------- | |
# | |
# Options:- | |
# | |
# h - Help (usage) | |
# D - debug run, used before installing or to check on servers | |
# P - Port number/name (medinf) | |
# T - Timeout (5 seconds) | |
# d - Disk usage threshold (85) | |
# r - RAID check (Values for active/working/spare/failed all separated by slashes) | |
# u - Uptime check (value entered in a min in hours and a max in days, seperated by a slash / ) | |
# l - 5 min load avg, enter float value, alarms if greater | |
# p - Max number of processes, both running and total separated by a slash (running/total) | |
# b - DB Space threshold, followed by selected db spaces, all separated by colons (eg 80:1:2:3:4) | |
# M - Reports if lost mesages are present on a GPRS station | |
# f - Check for read only file systems | |
# s - Swap space check | |
# z - Check size of radius files (Voda and O2 only) | |
# c - RedHat Clustering Check | |
# e - EMS Deamon process checking | |
$opts{'D'} = 0; | |
$opts{'P'} = 'medinf'; | |
$opts{'T'} = 5; | |
getopts('hDp:t:d:I:l:u:v:V:g:G:r:m:kp:b:fRMcezs',\%opts); | |
if (defined $opts{'h'}) | |
{ | |
ShowUsage(); | |
} | |
$hostListRef = | |
[ | |
# 'z', | |
# 'zam', | |
# 'hinge','bracket','nmcpri','viper', | |
# 'emailgw15-1', | |
# 'ms2-1','ms2-5', | |
# 'ms15-1','ms15-2','ms15-3', | |
# 'as2-2','as2-3', | |
# 'as15-1','as15-2', | |
# 'gsms2-1','auth2-3','vpn2-2', | |
# 'gprs2-1','gprs2-2','gprs2-3', | |
# 'gprs15-1','gprs15-2','gprs15-3','gprs15-4', | |
# 'host2-1','host2-2','host2-3','host2-4','host2-5','host2-6', | |
# 'host2-7','host2-8','host2-9', | |
# 'host15-1','host15-2','host15-3','host15-4','host15-5', | |
# 'host15-6','host15-7','host15-8','host15-9','host15-10','host15-11', | |
# 'host15-12','host15-13' | |
]; | |
ReadHosts($hostListRef); | |
# Connect to the servers, stream the data back and then decode into a strctured hash | |
{ | |
my ($h,$rawHostInfoRef); | |
# OK Here's the clever bit, make multiple connections to the servers, stream all the data back in | |
# parallel and store in a hash | |
$rawHostInfoRef = ConnectAndStream( $opts{'P'},$hostListRef, $opts{'T'}); | |
# print join ':',keys (%{$rawHostInfoRef}) , "\n\n";; | |
# Check for basic connection errors | |
foreach $h (grep !/^\-/,keys (%{$rawHostInfoRef}) ) | |
{ | |
if ($rawHostInfoRef->{$h}->{error} == 1) | |
{ | |
$retVal = 2; | |
$retSummary .= $h.' '; | |
$retDetail .= sprintf "%-13s - No Response (%s)\n",$h,$rawHostInfoRef->{$h}->{errstr}; | |
delete $rawHostInfoRef->{$h}; | |
} | |
} | |
# print join ':',keys (%{$rawHostInfoRef}) , "\n\n";; | |
$hostInfoRef = DecodeRawInfo($rawHostInfoRef,$opts{'D'}); | |
} | |
# Now at this point the raw info has been structured into a nice hash, so its time to do that specific stuff and check if its OK | |
{ | |
my ($h); | |
foreach $h (sort keys (%{$hostInfoRef})) | |
{ | |
my ($param); | |
foreach $param (sort keys %{$hostInfoRef->{$h}}) | |
{ | |
CheckParam(\$retVal,\$retSummary,\$retDetail,$h,$param,$hostInfoRef->{$h}->{$param}); | |
} | |
} | |
} | |
if ($retVal == 0) | |
{ | |
$retDetail =~ s/\n/<br>/ig; | |
print "Ok $retDetail"; | |
exit 0; | |
} | |
else | |
{ | |
$retDetail =~ s/\n/<br>/ig; | |
print $retDetail; | |
exit $retVal; | |
} | |
exit 3; | |
#------------------------------------------------------------------------------------------------------------------------------------------- | |
sub ShowUsage | |
{ | |
# Options:- | |
# | |
# D - debug run, used before installing or to check on servers | |
# P - Port number/name (medinf) | |
# T - Timeout (5 seconds) | |
# d - Disk usage threshold (85) | |
# r - RAID check (Values for active/working/spare/failed all separated by slashes) | |
# u - Uptime check (value entered in a min in hours and a max in days, seperated by a slash / ) | |
# l - 5 min load avg, enter float value, alarms if greater | |
# p - Max number of processes, both running and total separated by a slash (running/total) | |
# b - DB Space threshold, followed by selected db spaces, all separated by colons (eg 80:1:2:3:4) | |
# c - RedHat Cluster services check | |
print STDERR "\n"; | |
print STDERR "\n"; | |
print STDERR "$0 : Usage\n"; | |
print STDERR "$0 [-D] [-P <number|service>] [-T <timeout>] [-d <threshold>] [-r <pattern>] [-u <pattern>] [-l <max>] [-p <pattern>] [-k]\n"; | |
print STDERR "\n"; | |
print STDERR "$0 is a script primarily for running under mon to determine server environmental issues, previously handled by df web page\n"; | |
print STDERR "\n"; | |
print STDERR "-D - Debug - Switching this option on runs the script in a debug mode, useful for finding out what is actually\n"; | |
print STDERR " going on. Combine with other switches for more useful info. Default is off. Run from a shell.\n"; | |
print STDERR "-P - Port to query on the server, can be specified as number or service name, default medinf\n"; | |
print STDERR "-d - Check disk usage, followed by threshold value of percent utilised\n"; | |
print STDERR "-r - Check for RAID faults, specified as pattern 'A/W/F/S', usual value is '2/3/0/1'\n"; | |
print STDERR "-u - Check for notifiable uptime, either too low or too high, specified as 'min/max' where min in hours, max in days\n"; | |
print STDERR "-l - Check for loadavg, specified as float value for 5 min load, alerts if exceeded\n"; | |
print STDERR "-p - Check for abnormal numbers of process, specified as 'running/total'\n"; | |
print STDERR "-s - Check for swap memory being used\n"; | |
print STDERR "-k - No alarm raised, only useful in debug mode, specifies the kernel running on server\n"; | |
print STDERR "-b - Check database space usage for nodes running informix 9+. Input is the threshold percentage followed by the database spaces needed to be checked, separated by colons (eg 80:1:2:3:4)\n"; | |
print STDERR "-f - Check for read only file systems.\n"; | |
print STDERR "-c - Check status of RedHat Cluster services.\n"; | |
print STDERR "-z - Check Radius Login file size.\n"; | |
print STDERR "\n"; | |
print STDERR "\n"; | |
print STDERR "\n"; | |
exit 3; | |
} | |
#------------------------------------------------------------------------------------------------------------------------------------------- | |
# Pass this function a port number, then an arrayref containing hostnames and it will | |
# attempt to connect to all hosts in parallel on that port and retrieve a status string | |
# It will run until all connections have been closed by the remote end or nothing has | |
# been received for timeout seconds, which could be a fraction of a second, | |
# it will then return a hash ref as follows:- | |
# href-> {'-retval'} - Overall retcode | |
# {hostname} -> {error} - 0 - OK | |
# - 1 - Error occurred | |
# {errstr} - String describing error | |
# {retstr} - String returned from remote port | |
# | |
# Also function maintains internal hash to hold info during the query | |
# | |
# ihref-> {hostname} -> {addr} - Address of host | |
# -> {sock} - Filehandle of the socket | |
# -> {file} - File descriptor number to be used during select | |
sub ConnectAndStream | |
{ | |
my ($port,$hostRef,$timeout) = @_; | |
my ($href,$ihref); | |
my ($hr,$addr,$proto,$retVal,$finished); | |
$retVal = 0; | |
$href = {}; | |
$ihref = {}; | |
# Get the protocol value | |
$proto = getprotobyname('tcp'); | |
# Get the port number assuming it has been passed by name | |
if ($port !~ m/^\d+$/) | |
{ | |
$port = getservbyname($port,'tcp'); | |
} | |
if (!defined $port) | |
{ | |
return undef; | |
} | |
# Lookup all the hostnames and store in the internal hash ready for connecting | |
foreach $hr (@{$hostRef}) | |
{ | |
my ($r,$ir); | |
$href->{$hr} = $r = {}; | |
$r->{error} = 0; # Assume things start well | |
$ihref->{$hr} = $ir = {}; | |
$ir->{addr} = $addr = gethostbyname($hr); | |
if (!defined $addr) | |
{ | |
$retVal = 1; | |
$r->{error} = 1; | |
$r->{errstr} = "Host lookup error"; | |
$ir->{file} = -1; | |
} | |
} | |
# This code causes us to return if any lookup problems, that return could be done at the point | |
# of first failure, rather than whole lot, or not done at all, as fd is set to -1 so won't get | |
# included in select statement | |
# if ($retVal == 1) | |
# { | |
# $href->{'-retval'} = $retVal; | |
# return $href; | |
# } | |
# Go through all hosts and try to connect | |
foreach $hr (@{$hostRef}) | |
{ | |
my ($r,$ir,$paddr,$buffer,$oldfh); | |
$r = $href->{$hr}; | |
if ($r->{error} == 1) | |
{ | |
next; | |
} | |
$ir = $ihref->{$hr}; | |
$addr = $ir->{addr}; | |
# Create socket and get fd number for building seletr statement | |
$paddr = sockaddr_in($port,$addr); | |
socket($ir->{sock},PF_INET,SOCK_STREAM,$proto); | |
$ir->{file} = fileno ($ir->{sock}); | |
# Make socket NON_BLOCK before connect | |
$buffer = 0; | |
if ( (!fcntl ($ir->{sock},F_GETFL,$buffer)) || (!fcntl ($ir->{sock},F_SETFL,$buffer | O_NONBLOCK)) ) | |
{ | |
$retVal = 1; | |
$r->{error} = 1; | |
$r->{errstr} = "Can't fcntl on socket: $!"; | |
$ir->{file} = -1; | |
next; | |
} | |
# Connecting a non block socket immediately returns with an error, however connection continues\ | |
# in the background and we get the real error or not from the select | |
if ( (!connect($ir->{sock},$paddr)) && ($! ne 'Operation now in progress')) | |
{ | |
$retVal = 2; | |
$r->{error} = 1; | |
$r->{errstr} = "Can't connect: $!"; | |
$ir->{file} = -1; | |
next; | |
} | |
# Beware this is not a select, this merely makes the socket unbuffered | |
$oldfh = select($ref); $| = 1; select($oldfh); | |
#print "DEBUG: Host $hr, fd $ir->{file} \n"; | |
} | |
#print "DEBUG: All connections started, now lets see what happens...\n"; | |
# When we start we're not finished | |
$finished = 0; | |
while (!$finished) | |
{ | |
my ($rbits,$wbits,$ebits); | |
my ($h,$reading,$tmo,$nfound); | |
# We could be finished on this loop | |
$finished = 1; | |
$rbits = $wbits = $ebits = ''; | |
# Create the bit vectors for the select statement | |
foreach $h (@{$hostRef}) | |
{ | |
if ($ihref->{$h}->{file} >= 0) | |
{ | |
#print "DEBUG: Adding $h fd to vector\n"; | |
vec($rbits,$ihref->{$h}->{file},1) = 1; | |
$finished = 0; | |
} | |
} | |
if ($finished == 1) | |
{ | |
# No point going on | |
next; | |
} | |
$tmo = $timeout; | |
#printf "DEBUG: About to select\n"; | |
$nfound = select ($rbits,$wbits,$ebits,$tmo); | |
# If nothing ready after timeout, then no point going on | |
if ($nfound == 0) | |
{ | |
$finished = 1; | |
next; | |
} | |
# Read all fds that are ready | |
foreach $h (@{$hostRef}) | |
{ | |
my ($num,$temp,$ir,$r); | |
$ir = $ihref->{$h}; | |
$r = $href->{$h}; | |
if (vec($rbits,$ir->{file},1)) | |
{ | |
$num = sysread ($ir->{sock}, $temp, 4096); | |
if (!defined $num) | |
{ | |
$retVal = 2; | |
$ir->{file} = -1; | |
$r->{error} = 1; | |
$r->{errstr} = "Read error: $!"; | |
#print "DEBUG: Read error on $h: $!\n"; | |
} | |
elsif ($num == 0) | |
{ | |
#print "DEBUG: Shutting down $h fd $ir->{file} \n"; | |
$ir->{file} = -1; | |
close($ir->{sock}); | |
} | |
else | |
{ | |
$r->{retstr} .= $temp; | |
} | |
} | |
} | |
} | |
# All fds are exahusted now or errored, so shut down any that are left | |
foreach $hr (@{$hostRef}) | |
{ | |
if ($ihref->{$hr}->{file} >= 0) | |
{ | |
close ($ihref->{$hr}->{sock}); | |
} | |
} | |
$href->{'-retval'} = $retVal; | |
return $href; | |
} | |
#--------------------------------------------------------------------------------------- | |
sub ReadHosts | |
{ | |
my ($ref) = @_; | |
# All the other arguments should be hostnames to connect to | |
while($ARGV[0]) | |
{ | |
push @{$ref},shift @ARGV; | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub ExtractLine | |
{ | |
my ($iref) = @_; | |
my ($i,$line); | |
$line = ''; | |
if ($$iref =~ m/^(.*?)$(\r|\n{1,2})(.*)$/sm) | |
{ | |
$line = $1;$$iref = $3; | |
#print "iref %%% $$iref %%%%\n"; | |
} | |
chomp $line; | |
$line; | |
} | |
#--------------------------------------------------------------------------------------- | |
# | |
# This decodes the raw info received from the socket into a nicely structured hash/array structure as follows:- | |
# | |
# ref -> {hostname} -> {paramname} -> [] | |
# | |
# ref is returned by this function | |
# hostname will be the hostname of the host in question, param can be raid, uptime, load, df, basically its taken | |
# from the !!!start text received from the socket | |
# Then for each param an array where each element is a line of data related to that paremeter | |
# | |
sub DecodeRawInfo | |
{ | |
my ($rawRef,$debug) = @_; | |
my ($line,$h,$hr); | |
$hr = {}; | |
foreach $h (sort grep !/^\-/,keys (%{$rawRef}) ) | |
{ | |
my ($state,$raw,$param,$ir); | |
$hr->{$h} = $ir = {}; | |
$state = 'end'; | |
if ($debug == 1) | |
{ | |
printf "Host %13s -> ",$h; | |
} | |
$raw = $rawRef->{$h}->{retstr}; | |
if (defined $raw && $raw ne '') | |
{ | |
while ($line = ExtractLine(\$raw)) | |
{ | |
#print "Processing line '$line' \n"; | |
if (($state eq 'data') && ($line !~ m/^!!!/)) | |
{ | |
push @{$ir->{$param}},$line; | |
} | |
if (($state eq 'data') && ($line =~ m/^!!!end/)) | |
{ | |
$state = 'end'; | |
} | |
elsif ($state eq 'end') | |
{ | |
if ($line =~ m/^!!!start\s+(.*?)\s+info!!!$/) | |
{ | |
$param = $1; | |
if ($debug == 1) | |
{ | |
print "$param "; | |
} | |
$ir->{$param} = []; | |
$state = 'data'; | |
} | |
else | |
{ | |
print "SW Error on host $h: Failed to match a start line $line \n"; | |
} | |
} | |
} | |
} | |
if ($debug == 1) | |
{ | |
print "\n"; | |
} | |
} | |
$hr; | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckParam | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
if ($param eq 'df' && (defined $opts{'d'})) | |
{ | |
CheckDf($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'inode' && (defined $opts{'I'})) | |
{ | |
CheckInode($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'kernel' && (defined $opts{'k'})) | |
{ | |
CheckKernel($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'uptime' && (defined $opts{'u'})) | |
{ | |
CheckUptime($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'load' && (defined $opts{'l'})) | |
{ | |
CheckLoad($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'load' && (defined $opts{'p'})) | |
{ | |
CheckPCount($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'ps' && (defined $opts{'p'})) | |
{ | |
CheckPs($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'raid' && (defined $opts{'r'})) | |
{ | |
CheckRaid($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'megaraid' && (defined $opts{'m'})) | |
{ | |
CheckMegaraid($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'db_usage' && (defined $opts{'b'})) | |
{ | |
CheckDBUsage($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'route' && (defined $opts{'R'})) | |
{ | |
CheckRoute($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'lostmsg' && (defined $opts{'M'})) | |
{ | |
CheckLostMsgs($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'vdsCount' && (defined $opts{'v'})) | |
{ | |
CheckVdsMsg($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'vdsCount' && (defined $opts{'g'})) | |
{ | |
CheckVdsSize($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
#TEMPORARY CHECK FOR G4S | |
if ($param eq 'vdsG4SCount' && (defined $opts{'V'})) | |
{ | |
CheckG4SVdsMsg($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'vdsG4SCount' && (defined $opts{'G'})) | |
{ | |
CheckG4SVdsSize($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'readOnly' && (defined $opts{'f'})) | |
{ | |
readOnly($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'cluster' && (defined $opts{'c'})) | |
{ | |
rhCluster($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'radfileSize' && (defined $opts{'z'})) | |
{ | |
radfileSize($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'memfree' && (defined $opts{'s'})) | |
{ | |
CheckSwap($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
if ($param eq 'tibemsd' && (defined $opts{'e'})) | |
{ | |
CheckEMS($rvRef,$rsRef,$rdRef,$host,$param,$aref); | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckEMS | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($i,$proc,$memory,$port); | |
if ($opts{'D'} == 1) | |
{ | |
printf "Host %13s -> %s\n",$host,$aref->[0]; | |
} | |
foreach $i (1 .. $#$aref) | |
{ | |
($proc,$memory,$port) = split (' ',$aref->[$i]); | |
if (($memory > 768000) && ($memory < 1048576 )) | |
{ | |
$$rvRef = 1; | |
$$rdRef .= sprintf "Warning! Process id %s (listening at %s) is using %u K (more than 750meg) of memory\n",$proc,$port,$memory; | |
$$rsRef .= $host.' '; | |
} | |
if (($memory > 1048576) && ($memory < 768000 )) { | |
$$rvRef = 2; | |
$$rdRef .= sprintf "Critical! Process id %s (listening at %s) is using %u K (more than 1gig) of memory\n",$proc,$port,$memory; | |
$$rsRef .= $host.' '; | |
} | |
if (($$rvRef != 1) && ($$rvRef != 2)) { | |
$$rvRef = 0; | |
$$rdRef .= sprintf "Ok! Process %s is using %u K of memory\n",$proc,$memory; | |
$$rsRef .= $host.' '; | |
} | |
} | |
} | |
sub CheckSwap | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($i,$total,$used,$free,$percused); | |
if ($opts{'D'} == 1) | |
{ | |
printf "Host %13s -> %s\n",$host,$aref->[0]; | |
} | |
foreach $i (1 .. $#$aref) | |
{ | |
if ($aref->[$i] =~ m/^Swap:/) | |
{ | |
(undef,$total,$used,$free) = split (' ',$aref->[$i]); | |
if ($total == 0) | |
{ | |
$total = 0.001; | |
$$rvRef = 2; | |
$$rdRef .= sprintf "Critical! %s has no swap available\n",$host; | |
} | |
$percused = ($used / $total)*100; | |
#print "Total is $total\n"; | |
#print "Used is $used\n"; | |
# print "Free is $free\n"; | |
# print "Percent used is $percused\n"; | |
if ($percused > 75) | |
{ | |
$$rvRef = 2; | |
$$rdRef .= sprintf "Critical! %s is using %.2f percent (>75 percent) of swap\n",$host,$percused; | |
$$rsRef .= $host.' '; | |
} else { | |
if ($percused > 60) | |
{ | |
$$rvRef = 1; | |
$$rdRef .= sprintf "Warning! %s is using %.2f percent (>60 percent) of swap\n",$host,$percused; | |
$$rsRef .= $host.' '; | |
} | |
if (($$rvRef != 1) && ($$rvRef != 2)) { | |
$$rvRef = 0; | |
$$rdRef .= sprintf "Ok! %s is using %.2f percent of swap\n",$host,$percused; | |
$$rsRef .= $host.' '; | |
} | |
} | |
} | |
} | |
} | |
sub CheckDf | |
{ | |
# temp debugging added 26-04-11 will write output to file in /var/tmp/dfdebug | |
# to check for veriable values. SteveB | |
# 27-04-11 Problem found: logger15-1 had an unusually large device path to two of its mounted | |
# partions, this caused the df command to display the output for the partions on two lines. | |
# This cused the dfchecker.pl script to have undef veriables, and output error messages to mon. | |
# If the problem occurs again uncomment the liens below to output debug info to /var/tmp/dfdebug | |
# Then alter the /usr/local/bin/medmachinf on the system causing the problems to run the df command | |
# with the -P flag. | |
# open (MYFILE, '>>/var/tmp/dfdebug'); | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($bad,$i,$dev,$mp,$val,$v); | |
if ($opts{'D'} == 1) | |
{ | |
foreach $i (0 .. $#$aref) | |
{ | |
printf "Host %13s -> %s\n",$host,$aref->[$i]; | |
} | |
} | |
$bad = 0; | |
foreach $i (1 .. $#$aref) | |
{ | |
if ($aref->[$i] =~ m/^\/dev/) | |
{ | |
($dev,undef,undef,undef,$v,$mp) = split (' ',$aref->[$i]); | |
$v =~ s/^(\d*)%/$1/g; | |
#if(not defined $v) | |
#{ | |
# #variable is undefined | |
# print MYFILE "HOST: $host\n"; | |
# print MYFILE "The file system causing problems is $aref->[$i]\n"; | |
#} | |
$val = $v; | |
if ($val > $opts{'d'}) | |
{ | |
$$rvRef = 2; | |
if ($bad == 0) | |
{ | |
$bad = 1; | |
$$rsRef .= $host.' '; | |
} | |
} | |
$$rdRef .= sprintf "%14s -> %u%% \n",$mp,$val; | |
} | |
} | |
# close (MYFILE); | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckLostMsgs | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($i); | |
if ($opts{'D'} == 1) | |
{ | |
printf "Host %13s -> %s\n",$host,$aref->[0]; | |
} | |
if ($aref->[0] eq "yes") | |
{ | |
$$rvRef = 2; | |
$$rsRef .= $host.' '; | |
$$rdRef .= sprintf "%-13s - Lost messages in /usr/medway for GPRS station(s)\n",$host; | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckKernel | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
$aref->[0] =~ s/^.*?\s(.*)$/$1/; | |
if ($opts{'D'} == 1) | |
{ | |
printf "Host %13s -> %s\n",$host,$aref->[0]; | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckUptime | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($uptime,$idletime); | |
my ($min,$max); | |
($min,$max) = split('/',$opts{'u'}); | |
$min *= 3600; # min is displayed in hours | |
$max *= 3600 * 24; # max is displayed in days | |
($uptime,$idletime) = split(' ',$aref->[0]); | |
if ($opts{'D'} == 1) | |
{ | |
printf "Host %13s -> Up %10u Idle %10u (Up %u days, %0.2f%% busy)\n", | |
$host,$uptime,$idletime,$uptime/(3600*24),($uptime-$idletime)/$uptime; | |
} | |
if (($uptime < $min) || ($uptime > $max)) | |
{ | |
$$rsRef .= $host.' '; | |
if ($uptime < $min) | |
{ | |
$$rvRef = 1; | |
$$rdRef .= sprintf "Warning: Uptime %u hours\n",$uptime/3600; | |
} | |
else | |
{ | |
$$rvRef = 0; | |
$$rdRef .= sprintf "Warning. High uptimes (%u days in this case) make rebooting dangerous.\n",$uptime/(24*3600); | |
} | |
} | |
if (($uptime >= $min) && ($uptime <= $max)) | |
{ | |
$$rdRef .= sprintf "Ok: Uptime %u days\n",$uptime/(24*3600); | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckLoad | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($load); | |
if ($opts{'D'} == 1) | |
{ | |
printf "Host %13s -> %s\n",$host,$aref->[0]; | |
} | |
$load = (split(' ',$aref->[0]))[0]; | |
if ($load > $opts{'l'}) | |
{ | |
$$rvRef = 2; | |
$$rsRef .= $host.' '; | |
} | |
$$rdRef .= sprintf "5 minute load average: %0.2f\n",$load; | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckPs | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckRoute | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my $i; | |
if ( defined $aref->[0] && $aref->[0] =~ /changed/ ) | |
{ | |
$$rvRef = 2; | |
$$rsRef .= $host.' '; | |
$$rdRef .= sprintf "Errors in Routing Table %s<br> ",$host; | |
foreach $i (0 .. $#$aref) | |
{ | |
$$rdRef .= sprintf "%s<br> ", $aref->[$i]; | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckPCount | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($pcount,$rcount); | |
my ($rlimit,$plimit); | |
($rcount,$pcount) = split('/',(split(' ',$aref->[0]))[3]); | |
($rlimit,$plimit) = split('/',$opts{'p'}); | |
if ($opts{'D'} == 1) | |
{ | |
printf "Host %13s -> %u/%u\n",$host,$rcount,$pcount; | |
} | |
if (($rcount > $rlimit) || ($pcount > $plimit)) | |
{ | |
$$rvRef = 1; | |
$$rsRef .= $host.' '; | |
$$rdRef .= sprintf "%-13s -> Running/Total %u/%u \n",$host,$rcount,$pcount; | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckRaid | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($active,$working,$failed,$spare); | |
my ($i,@rParam,%rHash,$bad); | |
if ($opts{'D'} == 1) | |
{ | |
printf "Host %13s -> %s\n",$host,$aref->[0]; | |
} | |
$bad = 0; | |
foreach $i (0 .. $#$aref) | |
{ | |
my ($val,$pNam,$pVal,$p); | |
# Check each line is a valid raidinfo line | |
if ($aref->[$i] !~ m/^device_name/) | |
{ | |
$$rvRef = 1; | |
$$rsRef .= $host.' '; | |
$$rdRef .= sprintf "%-13s - Corrupt raidinfo line\n",$host; | |
return; | |
} | |
# split the raidinfo line into various params | |
@rParam = split(':',$aref->[$i]); | |
# Then attach to a hash | |
foreach $p (@rParam) | |
{ | |
my ($pNam,$pVal); | |
($pNam,$pVal) = split(' ',$p); | |
$rHash{$pNam} = $pVal; | |
} | |
$val = $rHash{'active_disks'} .'/'. $rHash{'working_disks'} .'/'. $rHash{'failed_disks'}.'/'. $rHash{'spare_disks'}; | |
if ($opts{'r'} ne $val) | |
{ | |
$$rvRef = 1; | |
if ($bad == 0) | |
{ | |
$$rsRef .= $host.' '; | |
$bad = 1; | |
} | |
$$rdRef .= sprintf "%-13s - Raidinfo for dev %s is %s\n",$host,$rHash{'device_name'},$val; | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckMegaraid | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($active,$working,$failed,$spare); | |
my ($i,@rParam,%rHash,$bad); | |
if ($opts{'D'} == 1) | |
{ | |
printf "Host %13s -> %s\n",$host,$aref->[0]; | |
} | |
$bad = 0; | |
if ($aref->[0] !~ m/^Logical.*optimal/) | |
{ | |
$$rsRef .= $host.' '; | |
$bad = 1; | |
$$rvRef = 1; | |
$$rdRef .= sprintf "%-11s - %-13s - Raid Failure\n",$host,$aref->[0]; | |
} | |
foreach $i (1 .. $#$aref) | |
{ | |
my ($val,$pNam,$pVal,$p); | |
# Check each line of raidinfo | |
if ($aref->[$i] !~ m/^Channel.*Online\./) | |
{ | |
if ($bad == 0) | |
{ | |
$$rsRef .= $host.' '; | |
$bad = 1; | |
} | |
$$rvRef = 1; | |
$$rdRef .= sprintf "%-11s - %-13s - Drive Failure\n",$host,$aref->[$i]; | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckDBUsage | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($i,$j,$state,$bad,@args,$threshold,$used,$percent); | |
my $dbRef = [ ]; | |
my $chuRef; | |
@args=split(':',$opts{'b'}); #split up the arguments | |
$threshold=shift(@args); #strip the threshold off the arguments | |
if ($opts{'D'} == 1) | |
{ | |
foreach $i (0 .. $#$aref) | |
{ | |
printf "Host %13s -> %s\n",$host,$aref->[$i]; | |
} | |
} | |
$bad = 0; | |
$state = 0; | |
foreach $i (0 .. $#$aref) | |
{ | |
#print "\n$aref->[$i]\n"; | |
if($aref->[$i] =~ m/^Chunks/) #after the word chunks comes our useful data | |
{ | |
$state=1; | |
if ($opts{'D'} == 1) | |
{ | |
print "Changed State (DB Checker)\n"; | |
} | |
} | |
if($aref->[$i] =~ m/\d+\s+active,/) #after the word active is not useful data | |
{ | |
$state=0; | |
if ($opts{'D'} == 1) | |
{ | |
print "Changed State (DB Checker)\n"; | |
} | |
} | |
if($state == 1) | |
{ | |
if($aref->[$i] =~ /\w+\s+\d+\s+(\d).*\/dev\//) # check to see if the line contains the data we are after, and return the dbspace number | |
{ | |
#print "hello .... $1 ..... \n"; | |
foreach $j (@args) # go through each dbspace specifed at the command line | |
{ | |
if($j == $1) # if db space number matches one of the db space numbers specifed by the user | |
{ | |
if(!defined($dbRef->[$j])) # if it is not already created, make a hash reference in the array reference dbRef under the index of the db space | |
{ | |
$dbRef->[$j]= { }; | |
} | |
$chuRef=$dbRef->[$j]; # make chuRef the hash reference for $dbRef->[$j]= { } | |
#print "$j\n"; | |
$aref->[$i] =~ m/\w+\s+\d+\s+\d+\s+\d+\s+(\d+).*\/dev\//; # find how much space is allocated in the chunk | |
$chuRef->{'space'} += $1; | |
$aref->[$i] =~ m/\w+\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+).*\/dev\//;# find how much space is free. | |
$chuRef->{'free'} += $1; | |
$aref->[$i] =~ m/(\/dev\/\w+\/\w+)/; # find the device the chunk is allocated under | |
$chuRef->{'dev'} .= "$1 "; | |
#print "$dbRef->[$j]->{'space'}\n"; | |
#print "$dbRef->[$j]->{'free'}\n"; | |
#print "$dbRef->[$j]->{'dev'}\n\n"; | |
undef $chuRef; # clean up | |
} | |
} | |
} | |
} | |
} | |
foreach $i (0 .. $#$dbRef) # go through the data collected | |
{ | |
if(defined $dbRef->[$i]) # not all indecies may be defined | |
{ | |
#print "Space $dbRef->[$i]->{'space'}; \nFree $dbRef->[$i]->{'free'}; \n"; | |
$used = ($dbRef->[$i]->{'space'}) - ($dbRef->[$i]->{'free'}); # work out how much space has been used | |
#print "Used $used\n"; | |
$percent = ($used/$dbRef->[$i]->{'space'})*100; # work out the percentage free space | |
#print "Percent $percent \n"; | |
#print "Threshold $threshold\n"; | |
if($percent > $threshold) | |
{ | |
#print "ok\n"; | |
if ($bad == 0) | |
{ | |
$bad = 1; | |
$$rsRef .= $host.' '; | |
} | |
$$rvRef = 1; | |
$$rdRef .= sprintf "%-13s - %-10s -> %u%% \n",$host,$dbRef->[$i]->{'dev'},$percent; | |
} | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckVdsSize | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($i,$bad); | |
$bad=0; | |
foreach $i (0 .. $#$aref) | |
{ | |
if($aref->[$i] =~ m/VDS Count script has timed out/) | |
{ | |
if($bad != 1) | |
{ | |
$$rsRef .= $host.' '; | |
$$rdRef .= sprintf "Size of messages in VDS for %s\n",$host; | |
} | |
$$rvRef = 2; | |
$bad=1; | |
$$rdRef .= "VDS Count script has timed out, Please check"; | |
} | |
if($aref->[$i] =~ m/(\S+)\s+\w+:\s+(\w+)\s+Backlog:\s+(\d+)/) | |
{ | |
if ($3 > $opts{'g'}) | |
{ | |
if($bad != 1) | |
{ | |
$$rsRef .= $host.' '; | |
$$rdRef .= sprintf "Size of messages in VDS for %s\n",$host; | |
} | |
$$rvRef = 2; | |
$bad=1; | |
} | |
$$rdRef .= sprintf "%-13s - %-10s -> %uK \n",$1,$2,$3; | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckVdsMsg | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($i,$bad); | |
$bad=0; | |
foreach $i (0 .. $#$aref) | |
{ | |
#print "\n$aref->[$i]\n"; | |
if($aref->[$i] =~ m/VDS Count script has timed out/) | |
{ | |
if($bad != 1) | |
{ | |
$$rsRef .= $host.' '; | |
$$rdRef .= sprintf "Size of messages in VDS for %s\n",$host; | |
} | |
$$rvRef = 2; | |
$bad=1; | |
$$rdRef .= "VDS Count script has timed out, Please check"; | |
} | |
#print "\n$aref->[$i]\n"; | |
if($aref->[$i] =~ m/(\S+)\s+\w+:\s+(\w+)\s+\w+:\s+\d+\s+NumMsgs:\s+(\d+)/) | |
{ | |
#print "$1 $2 $3\n"; | |
if ($3 > $opts{'v'}) | |
{ | |
if($bad != 1) | |
{ | |
$$rsRef .= $host.' '; | |
$$rdRef .= sprintf "Number of messages in VDS for %s\n",$host; | |
} | |
$$rvRef = 2; | |
$bad=1; | |
} | |
$$rdRef .= sprintf "%-13s - %-10s -> %u \n",$1,$2,$3; | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckG4SVdsSize | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($i,$bad); | |
$bad=0; | |
foreach $i (0 .. $#$aref) | |
{ | |
#print "\n$aref->[$i]\n"; | |
if($aref->[$i] =~ m/(\S+)\s+\w+:\s+(\w+)\s+Backlog:\s+(\d+)/) | |
{ | |
#print "$1 $2 $3\n"; | |
if ($3 > $opts{'G'}) | |
{ | |
if($bad != 1) | |
{ | |
$$rsRef .= $host.' '; | |
$$rdRef .= sprintf "Size of messages in VDS for %s\n",$host; | |
} | |
$$rvRef = 1; | |
$bad=1; | |
$$rdRef .= sprintf "%-13s - %-13s - %-10s -> %uK \n",$host,$1,$2,$3; | |
} | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckG4SVdsMsg | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($i,$bad); | |
$bad=0; | |
foreach $i (0 .. $#$aref) | |
{ | |
#print "\n$aref->[$i]\n"; | |
if($aref->[$i] =~ m/(\S+)\s+\w+:\s+(\w+)\s+\w+:\s+\d+\s+NumMsgs:\s+(\d+)/) | |
{ | |
#print "$1 $2 $3\n"; | |
if ($3 > $opts{'V'}) | |
{ | |
if($bad != 1) | |
{ | |
$$rsRef .= $host.' '; | |
$$rdRef .= sprintf "Number of messages in VDS for %s\n",$host; | |
} | |
$$rvRef = 1; | |
$bad=1; | |
$$rdRef .= sprintf "%-13s - %-13s - %-10s -> %u \n",$host,$1,$2,$3; | |
} | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub CheckInode | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($bad,$i,$dev,$mp,$val,$v); | |
if ($opts{'D'} == 1) | |
{ | |
foreach $i (0 .. $#$aref) | |
{ | |
printf "Host %13s -> %s\n",$host,$aref->[$i]; | |
} | |
} | |
$bad = 0; | |
foreach $i (1 .. $#$aref) | |
{ | |
if ($aref->[$i] =~ m/^\/dev/) | |
{ | |
($dev,undef,undef,undef,$v,$mp) = split (' ',$aref->[$i]); | |
$v =~ s/^(\d*)%/$1/g; | |
$val = $v; | |
if ($val ne '-') | |
{ | |
if ($val > $opts{'I'}) | |
{ | |
$$rvRef = 2; | |
if ($bad == 0) | |
{ | |
$bad = 1; | |
$$rsRef .= $host.' '; | |
} | |
} | |
$$rdRef .= sprintf "%14s -> %u%% \n",$mp,$val; | |
} | |
} | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub readOnly | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($fail) = 0; | |
foreach (0 .. $#$aref) | |
{ | |
if ($opts{'D'} == 1 ){ | |
print $aref->[$_], "\n"; | |
} | |
if($aref->[$_] =~ /FAIL\s(.*)/) | |
{ | |
$$rvRef = 2; | |
$fail = 1; | |
$$rdRef .= sprintf "%-13s - READ-ONLY %s\n", $host, $1; | |
} | |
} | |
if( $fail == 1 ) | |
{ | |
$$rsRef .= $host.' '; | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub radfileSize | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($fail) = 0; | |
foreach (0 .. $#$aref) | |
{ | |
if ($opts{'D'} == 1 ){ | |
print $aref->[$_], "\n"; | |
} | |
$$rvRef = 1; | |
$fail = 1; | |
$$rdRef .= sprintf "%-13s - RADIUS FILE TOO LARGE - %s\n", $host, $aref->[$_]; | |
} | |
if( $fail == 1 ) | |
{ | |
$$rsRef .= $host.' '; | |
} | |
} | |
#--------------------------------------------------------------------------------------- | |
sub rhCluster | |
{ | |
my ($rvRef,$rsRef,$rdRef,$host,$param,$aref) = @_; | |
my ($i,$serv); | |
my $fail=0; | |
my $qdisk=0; | |
foreach $i (0 .. $#$aref) | |
{ | |
if($aref->[$i] =~ /refused/) | |
{ | |
$$rvRef = 2; | |
$fail = 1; | |
$$rdRef .= sprintf "Cluster services are not running on %-13s\n", $host; | |
} | |
if($aref->[$i] =~ /Local/) | |
{ | |
if($aref->[$i] !~ /rgmanager/) | |
{ | |
$$rvRef = 2; | |
$fail = 1; | |
$$rdRef .= sprintf "rgmanager service is not running on %-13s\n", $host; | |
} | |
elsif ($opts{'D'} == 1) | |
{ | |
printf "rgmanager service is running on %-13s\n", $host; | |
} | |
} | |
if($aref->[$i] !~ /Local/ && $aref->[$i] !~ /Quorum Disk/ && $aref->[$i] =~ /Online/ && $aref->[$i] !~ /rgmanager/) | |
{ | |
$$rvRef = 2; | |
$fail = 1; | |
$aref->[$i] =~ /(\S+)/; | |
$$rdRef .= sprintf "%s reports rgmanager service is not running on %s, please check the rgmanager service on both nodes\n", $host ,$1; | |
#print $aref->[$i]."\n"; | |
} | |
elsif ( $opts{'D'} == 1 && $aref->[$i] !~ /Local/ && $aref->[$i] !~ /Quorum Disk/ && $aref->[$i] =~ /Online/ && $aref->[$i] =~ /rgmanager/) | |
{ | |
$aref->[$i] =~ /(\S+)/; | |
printf "%s reports rgmanager service is running on %s\n", $host ,$1; | |
} | |
if($aref->[$i] =~ /Quorum Disk/) | |
{ | |
$qdisk=1; | |
} | |
if($aref->[$i] =~ /service\:(\w+)/) | |
{ | |
$serv=$1; | |
if($aref->[$i] !~ /started/) | |
{ | |
$$rvRef = 2; | |
$fail = 1; | |
$$rdRef .= sprintf "Clustered service \"%s\" is not shown as running on %-13s\n", $serv, $host; | |
} | |
elsif($opts{'D'} == 1) | |
{ | |
printf "Clustered service \"%s\" is shown as running on %-13s\n", $serv, $host; | |
} | |
} | |
} | |
if($qdisk == 0) | |
{ | |
$$rvRef = 1; | |
$fail = 1; | |
$$rdRef .= sprintf "Quorum disk is not present on %-13s\n", $host; | |
} | |
elsif ($opts{'D'} == 1) | |
{ | |
printf "Quorum disk is present on %-13s\n", $host; | |
} | |
if( $fail == 1 ) | |
{ | |
$$rsRef .= $host.' '; | |
} else { | |
$$rdRef .= sprintf "Cluster seems ok on %-13s\n", $host; | |
} | |
} | |
#--------------------------------------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment