Skip to content

Instantly share code, notes, and snippets.

@dexterbt1
Created June 5, 2010 10:24
Show Gist options
  • Save dexterbt1/426515 to your computer and use it in GitHub Desktop.
Save dexterbt1/426515 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl -w
use strict;
use warnings;
use File::Spec;
use Getopt::Long;
use Data::Dumper;
use YAML qw/Dump LoadFile/;
use POSIX qw(setsid :sys_wait_h);
use Term::ANSIColor qw/:constants/;
$Term::ANSIColor::AUTORESET = 1;
my $usage = "Usage: $0 <config_file> [--verbose] [--pscmd=...] [--stdionewline=1]\n";
my %opts = (
pscmd => "ps wwwwwxo pid,cmd",
verbose => 0,
stdionewline => 1,
);
GetOptions(
"pscmd=s" => \$opts{pscmd},
"verbose" => \$opts{verbose},
"stdionewline=i" => \$opts{stdionewline},
);
chat("Wyrls-Safetynet Process Checker\n");
my $conf_file = shift @ARGV || '';
if (not -e $conf_file) {
chat_error("ERROR: config file not found: $conf_file\n");
die $usage;
}
my $conf;
eval {
$conf = LoadFile($conf_file);
};
if ($@) {
die "$@\n$usage";
}
# validate config
my @apps = ();
{
foreach my $c (@$conf) {
if (scalar(keys %$c) != 1) {
chat_error("ERROR: multiple keys detected for the following section:\n");
chat_dump(Dump($c));
exit(255);
}
my ($app_name) = keys %$c;
my $app = $c->{$app_name};
# check required values for app
foreach my $appkey (qw/ps_grep_word run/) {
if (not exists $app->{$appkey}) {
chat_error("ERROR: missing '$appkey' value for app [$app_name]:\n");
chat_dump(Dump($c));
exit(255);
}
}
# check that grep word can be grep'd from the run value,
# note that this is a just shallow test,
# there are no guarantees that the grep word can be grep'd: e.g SOMEVAR=asdf /usr/bin/mycommand
my $gw = $app->{ps_grep_word};
if ($app->{run} !~ /$gw/) {
chat_error("ERROR: cannot match grepword from the run command:\n");
chat_dump(Dump($c));
exit(255);
}
push @apps, [ $app_name, $app ];
}
my $o = 1;
# check duplicate app_names and grep words
# here, we compare each app against all the other apps, excepts oneself of course
foreach my $oac (@apps) {
my $i = 1;
my ($oapp_name, $oapp) = @$oac;
foreach my $iac (@apps) {
my ($iapp_name, $iapp) = @$iac;
if ($i == $o) {
next;
}
if ($oapp_name eq $iapp_name) {
chat_error( "ERROR: potential duplicate apps with name '$oapp_name':\n" );
chat_dump( Dump({ $oapp_name => $oapp },{ $iapp_name => $iapp }) );
exit(255);
}
my $ogw = $oapp->{ps_grep_word};
my $igw = $iapp->{ps_grep_word};
if ( ($iapp->{ps_grep_word} =~ /$ogw/) or ($oapp->{ps_grep_word} =~ /$igw/) ) {
chat_error( "ERROR: potential duplicate grep word detected between apps '$oapp_name' and '$iapp_name':\n" );
chat_dump( Dump({ $oapp_name => $oapp },{ $iapp_name => $iapp }) );
exit(255);
}
$i++;
}
$o++;
}
}
# check and restart
foreach my $appc (@apps) {
my ($app_name, $app) = @$appc;
out(sprintf("- %-20s\t",$app_name));
if (app_is_alive( $app )) {
out(GREEN."Running!".RESET."\n");
}
else {
out(YELLOW."Dead. Trying to recover ... ".RESET);
if (app_restart( $app )) {
out(GREEN."Recovered!".RESET."\n");
}
else {
out(RED."Failure! [$app->{run}]".RESET."\n");
}
}
}
sleep 1;
chat("Done.\n");
exit(0);
# ================================
sub app_is_alive {
my ($app) = @_;
my $gw = $app->{ps_grep_word};
my $cmd = $opts{pscmd};
open my $fh, "$cmd |"
or do {
chat_error("FATAL: unable to run pscmd! $!\n");
exit(255);
};
my $found = 0;
while (my $grepline = <$fh>) {
chomp $grepline;
next if ($grepline =~ /sh -c/);
if ($grepline =~ /$gw/) {
$found = 1;
last;
}
}
return $found;
}
sub app_restart {
my ($app) = @_;
my $pass = 0;
my $cmd = $app->{run} || '';
# interpolate env
foreach my $k (keys %ENV) {
my $v = $ENV{$k};
$cmd =~ s/(\$$k|\$\{$k\})/$v/g;
}
# trim cmds
$cmd =~ s/^\s*//g;
$cmd =~ s/\s*$//g;
# attempt run
$SIG{CHLD} = \&REAPER;
my $pid = fork;
(defined $pid)
or die "unable to fork";
if ($pid == 0) {
# start a new session, needed in order to create a new process group
setsid() || die "Can't start a new session: $!";
umask 0002;
my ($exec, $exec_args) = split /\s+/, $cmd, 2;
$exec_args ||= '';
if ($opts{stdionewline}) {
# TODO: this is hardcoded behavior for now, since this is needed by servicedesk for the nagios plugin
open STDIN, '/dev/null'
or die "Can't read /dev/null: $!";
open(STDOUT, "| tr \\\\n ' '")
or die "ERROR: unable to run tr command: $!";
open(STDERR, ">&STDOUT");
}
exec($exec, $exec_args);
exit(255);
}
# i am the parent
sleep 2;
my $wait = waitpid($pid,WNOHANG);
$pass = (kill 0 => $pid) and ($wait == 0) and app_is_alive($app);
return $pass;
}
sub REAPER {
my $child;
my $died = 0;
while (($child = waitpid(-1,WNOHANG)) > 0) {
my $kid_status = $?;
}
$SIG{CHLD} = \&REAPER; # still loathe sysV
}
sub chat_error {
print STDERR join('',RED,@_,RESET);
}
sub chat_dump {
print STDERR join('',GREEN,@_,RESET);
}
sub chat {
if ($opts{verbose}) {
print STDERR join('',@_);
}
}
sub out {
print STDOUT join('',@_);
}
__END__
- myapp1:
ps_grep_word: SAVETESTCONF
run: $HOME/pl/testdaemon.pl SAVETESTCONF $HOME/etc/$HOSTNAME/myapp.config
- myapp2:
ps_grep_word: TESTSAVED
run: $HOME/pl/testdaemon.pl TESTSAVED $HOME/etc/$HOSTNAME/myapp.config
- myapp3:
ps_grep_word: TEST2SAVED
run: $HOME/pl/testdaemon.pl TEST2SAVED
- myapp4:
ps_grep_word: TEST3SAVE
run: /tmp$HOME/pl/testdaemon.pl TEST3SAVE
- myapp5:
ps_grep_word: TEST4SAVED
run: $HOME/pl/testdaemon.pl TEST4SAVED
- myapp6:
ps_grep_word: TEST6SAVED
run: $HOME/pl/testdaemon.pl TEST6SAVED
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment