Created
August 4, 2017 18:04
-
-
Save anonymous/5ca7804d3b43aef1c17decbf2448b0c5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Getopt::Long; | |
use OpenSRF::System; | |
use OpenSRF::AppSession; | |
use OpenSRF::EX qw(:try); | |
# Sane-ish default | |
my $opt_osrf_config = '/openils/conf/opensrf_core.xml'; | |
# For storing the list of supposedly active services | |
my @services; | |
# For storing our list of routers to check | |
my @routers; | |
GetOptions( | |
'osrf-config=s' => \$opt_osrf_config, | |
); | |
# If we can't bootstrap then something is horribly wrong! | |
# Probably "ejabberd isn't running" | |
try { | |
OpenSRF::System->bootstrap_client(config_file => $opt_osrf_config); | |
} otherwise { | |
print "Bootstrap failed\n"; | |
exit 2; | |
}; | |
# This gets the list of supposedly active services | |
sub prep_service_list { | |
# Using settings directly, as I don't know how to ask with pre-existing classes | |
my $session = OpenSRF::AppSession->create('opensrf.settings'); | |
try { | |
$session->connect; | |
} otherwise { | |
print "Settings Connect Failed\n"; | |
exit 2; | |
}; | |
# This xpath is "Find every instace of an appname node under an activeapps node, anywhere" | |
# It should grab every app configured to run on any drone | |
# If your config contains apps that are not run on real drones you will get errors ;) | |
my $req = $session->request('opensrf.settings.xpath.get', '//activeapps/appname'); | |
my $list = $req->recv; | |
if(UNIVERSAL::isa($list,"Error")) { | |
print "Active Apps List Failed\n"; | |
exit 2; | |
} | |
$req->finish; | |
# Quick and dirty de-dupe | |
my %u_list = map { ($_ => 1) } @{$list->content}; | |
# And save for later | |
@services = keys(%u_list); | |
$session->finish; | |
$session->disconnect; | |
} | |
# This gets the list of supposedly active routers | |
# This relies on the bootstrap being accurate in that regard | |
sub prep_routers_list { | |
# First, we grab our (hopefully) cached config | |
my $config = OpenSRF::Utils::Config->current; | |
# Loop over it quick | |
foreach(@{$config->bootstrap->routers}) { | |
# And make entries for each router | |
my $router = {}; | |
$router->{name} = $_->{name}; | |
$router->{domain} = $_->{domain}; | |
# If we don't have a services list assume all active ones (aka, private router) | |
$router->{services} = \@services unless $_->{services}; | |
# Otherwise, make note of what we are supposed to be running (aka, public router) | |
$router->{services} = $_->{services}->{service} if $_->{services}; | |
# And tack it onto the list | |
push @routers, $router; | |
} | |
} | |
# This does the actual checking of routers/services | |
sub check_routers { | |
# Shortcut | |
my $conf = OpenSRF::Utils::Config->current; | |
foreach my $router (@routers) { | |
# HACK WARNING - This changes the router we will be querying | |
# This basically edits the cached bootstrap file. This is not guaranteed to keep working. | |
# This does NOT change what domain we are querying from | |
$conf->bootstrap->router_name($router->{name}); | |
$conf->bootstrap->domain($router->{domain}); | |
# Assume things failed unless they didn't. | |
my $failed = 1; | |
# First, check the router to see what it claims to have active services-wise | |
my $session = OpenSRF::AppSession->create('router'); | |
try { | |
$failed = 0 if $session->connect; | |
} otherwise { | |
$failed = 1; | |
}; | |
if($session->state != $session->CONNECTED || $failed) { | |
$router->{online} = 0; | |
next; | |
} | |
# Yay router commands! This should give us all services with at least one listener | |
my $req = $session->request('opensrf.router.info.class.list'); | |
my $class_list = $req->recv; | |
$req->finish; | |
if(UNIVERSAL::isa($class_list,"Error")) { | |
$session->finish; | |
$session->disconnect; | |
$router->{online} = 0; | |
next; | |
} | |
# If we got an answer then this router is online! | |
$router->{online} = 1; | |
# Counters and storage for services checks | |
$router->{checked} = 0; | |
$router->{pass} = 0; | |
$router->{failed} = []; | |
# Quick reference of what the router told us it has | |
my %online_services = map { ($_ => 1) } @{$class_list->content}; | |
foreach my $service (@{$router->{services}}) { | |
# This skips services not in the active list. Mainly for routers with explicit lists (aka, public routers) that not all may be configured to run. | |
next unless grep { $service eq $_ } @services; | |
# Assume we did not pass until proven otherwise | |
my $passed = 0; | |
$router->{checked} += 1; | |
if($online_services{$service}) { | |
# Check the service, even if a listener is registered it may be dead | |
my $session2 = OpenSRF::AppSession->create($service); | |
try { | |
$session2->connect; | |
}; | |
if($session2->state == $session2->CONNECTED) { | |
# To my knowledge, EVERY service should have atomic echo available | |
my $req2 = $session2->request('opensrf.system.echo.atomic','Test'); | |
my $testresult = $req2->recv; | |
if(!UNIVERSAL::isa($testresult,"Error")) { | |
# If we got back what we passed in the service is working! Ish. Not a flawless test. | |
$passed = 1 if @{$testresult->content}[0] eq 'Test'; | |
} | |
$req2->finish; | |
$session2->finish; | |
$session2->disconnect; | |
} | |
} | |
if($passed) { | |
# Looks like it works, make note! | |
$router->{pass} += 1; | |
} else { | |
# Doesn't work! Save for later reporting. | |
push @{$router->{failed}}, $service; | |
} | |
} | |
$session->finish; | |
$session->disconnect; | |
} | |
} | |
# This outputs the result for Nagios | |
sub output_result { | |
# Counters/storage | |
my $checked_services = 0; | |
my $up_services = 0; | |
my @down_services; | |
my @down_routers; | |
# Assume all is good until proven otherwise | |
my $retcode = 0; | |
foreach my $router (@routers) { | |
# If the router isn't online then we don't need to look at services - We didn't check any! | |
if(!$router->{online}) { | |
push @down_routers, $router->{domain}; | |
next; | |
} | |
# Otherwise increment our counters as needed | |
$checked_services += $router->{checked}; | |
$up_services += $router->{pass}; | |
foreach (@{$router->{failed}}) { | |
# Keep track of any down services for reporting in a minute | |
push @down_services, $router->{domain} . ':' . $_; | |
} | |
} | |
if(@down_routers) { | |
# Down routers are really bad. Chances are there will only ever be one here (public), but join with commas anyway. | |
print "Router(s) Offline: " . join(', ', @down_routers) . "\n"; | |
$retcode = 2; | |
} elsif ($checked_services != $up_services) { | |
# Non-responsive services are also really bad | |
print "Service(s) not responding\n"; | |
$retcode = 2; | |
} else { | |
# But if we have nothing then things are good! | |
print "Routers/Services OK\n"; | |
} | |
# If there are down services then spit them out as additional information. | |
print "$_\n" foreach (@down_services); | |
# And return our response code | |
exit $retcode; | |
} | |
# CHEAT - We need SettingsClient to have cached stuff | |
try { | |
OpenSRF::Utils::SettingsClient->new()->config_value('none'); | |
} otherwise { | |
print "Settings Fetch Failed\n"; | |
exit 2; | |
}; | |
# And run all of the above functions | |
prep_service_list(); | |
prep_routers_list(); | |
check_routers(); | |
output_result(); | |
# This code should NEVER run, as the only way out of output_result is an exit statement | |
print "What? I shouldn't have reached here."; | |
exit 3; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment