Skip to content

Instantly share code, notes, and snippets.

Created August 4, 2017 18:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/5ca7804d3b43aef1c17decbf2448b0c5 to your computer and use it in GitHub Desktop.
Save anonymous/5ca7804d3b43aef1c17decbf2448b0c5 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use OpenSRF::System;
use OpenSRF::AppSession;
use OpenSRF::EX qw(:try);
# Sane-ish default
my $opt_osrf_config = '/openils/conf/opensrf_core.xml';
# For storing the list of supposedly active services
my @services;
# For storing our list of routers to check
my @routers;
GetOptions(
'osrf-config=s' => \$opt_osrf_config,
);
# If we can't bootstrap then something is horribly wrong!
# Probably "ejabberd isn't running"
try {
OpenSRF::System->bootstrap_client(config_file => $opt_osrf_config);
} otherwise {
print "Bootstrap failed\n";
exit 2;
};
# This gets the list of supposedly active services
sub prep_service_list {
# Using settings directly, as I don't know how to ask with pre-existing classes
my $session = OpenSRF::AppSession->create('opensrf.settings');
try {
$session->connect;
} otherwise {
print "Settings Connect Failed\n";
exit 2;
};
# This xpath is "Find every instace of an appname node under an activeapps node, anywhere"
# It should grab every app configured to run on any drone
# If your config contains apps that are not run on real drones you will get errors ;)
my $req = $session->request('opensrf.settings.xpath.get', '//activeapps/appname');
my $list = $req->recv;
if(UNIVERSAL::isa($list,"Error")) {
print "Active Apps List Failed\n";
exit 2;
}
$req->finish;
# Quick and dirty de-dupe
my %u_list = map { ($_ => 1) } @{$list->content};
# And save for later
@services = keys(%u_list);
$session->finish;
$session->disconnect;
}
# This gets the list of supposedly active routers
# This relies on the bootstrap being accurate in that regard
sub prep_routers_list {
# First, we grab our (hopefully) cached config
my $config = OpenSRF::Utils::Config->current;
# Loop over it quick
foreach(@{$config->bootstrap->routers}) {
# And make entries for each router
my $router = {};
$router->{name} = $_->{name};
$router->{domain} = $_->{domain};
# If we don't have a services list assume all active ones (aka, private router)
$router->{services} = \@services unless $_->{services};
# Otherwise, make note of what we are supposed to be running (aka, public router)
$router->{services} = $_->{services}->{service} if $_->{services};
# And tack it onto the list
push @routers, $router;
}
}
# This does the actual checking of routers/services
sub check_routers {
# Shortcut
my $conf = OpenSRF::Utils::Config->current;
foreach my $router (@routers) {
# HACK WARNING - This changes the router we will be querying
# This basically edits the cached bootstrap file. This is not guaranteed to keep working.
# This does NOT change what domain we are querying from
$conf->bootstrap->router_name($router->{name});
$conf->bootstrap->domain($router->{domain});
# Assume things failed unless they didn't.
my $failed = 1;
# First, check the router to see what it claims to have active services-wise
my $session = OpenSRF::AppSession->create('router');
try {
$failed = 0 if $session->connect;
} otherwise {
$failed = 1;
};
if($session->state != $session->CONNECTED || $failed) {
$router->{online} = 0;
next;
}
# Yay router commands! This should give us all services with at least one listener
my $req = $session->request('opensrf.router.info.class.list');
my $class_list = $req->recv;
$req->finish;
if(UNIVERSAL::isa($class_list,"Error")) {
$session->finish;
$session->disconnect;
$router->{online} = 0;
next;
}
# If we got an answer then this router is online!
$router->{online} = 1;
# Counters and storage for services checks
$router->{checked} = 0;
$router->{pass} = 0;
$router->{failed} = [];
# Quick reference of what the router told us it has
my %online_services = map { ($_ => 1) } @{$class_list->content};
foreach my $service (@{$router->{services}}) {
# This skips services not in the active list. Mainly for routers with explicit lists (aka, public routers) that not all may be configured to run.
next unless grep { $service eq $_ } @services;
# Assume we did not pass until proven otherwise
my $passed = 0;
$router->{checked} += 1;
if($online_services{$service}) {
# Check the service, even if a listener is registered it may be dead
my $session2 = OpenSRF::AppSession->create($service);
try {
$session2->connect;
};
if($session2->state == $session2->CONNECTED) {
# To my knowledge, EVERY service should have atomic echo available
my $req2 = $session2->request('opensrf.system.echo.atomic','Test');
my $testresult = $req2->recv;
if(!UNIVERSAL::isa($testresult,"Error")) {
# If we got back what we passed in the service is working! Ish. Not a flawless test.
$passed = 1 if @{$testresult->content}[0] eq 'Test';
}
$req2->finish;
$session2->finish;
$session2->disconnect;
}
}
if($passed) {
# Looks like it works, make note!
$router->{pass} += 1;
} else {
# Doesn't work! Save for later reporting.
push @{$router->{failed}}, $service;
}
}
$session->finish;
$session->disconnect;
}
}
# This outputs the result for Nagios
sub output_result {
# Counters/storage
my $checked_services = 0;
my $up_services = 0;
my @down_services;
my @down_routers;
# Assume all is good until proven otherwise
my $retcode = 0;
foreach my $router (@routers) {
# If the router isn't online then we don't need to look at services - We didn't check any!
if(!$router->{online}) {
push @down_routers, $router->{domain};
next;
}
# Otherwise increment our counters as needed
$checked_services += $router->{checked};
$up_services += $router->{pass};
foreach (@{$router->{failed}}) {
# Keep track of any down services for reporting in a minute
push @down_services, $router->{domain} . ':' . $_;
}
}
if(@down_routers) {
# Down routers are really bad. Chances are there will only ever be one here (public), but join with commas anyway.
print "Router(s) Offline: " . join(', ', @down_routers) . "\n";
$retcode = 2;
} elsif ($checked_services != $up_services) {
# Non-responsive services are also really bad
print "Service(s) not responding\n";
$retcode = 2;
} else {
# But if we have nothing then things are good!
print "Routers/Services OK\n";
}
# If there are down services then spit them out as additional information.
print "$_\n" foreach (@down_services);
# And return our response code
exit $retcode;
}
# CHEAT - We need SettingsClient to have cached stuff
try {
OpenSRF::Utils::SettingsClient->new()->config_value('none');
} otherwise {
print "Settings Fetch Failed\n";
exit 2;
};
# And run all of the above functions
prep_service_list();
prep_routers_list();
check_routers();
output_result();
# This code should NEVER run, as the only way out of output_result is an exit statement
print "What? I shouldn't have reached here.";
exit 3;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment