Created
February 22, 2012 17:26
-
-
Save hcoyote/1886187 to your computer and use it in GitHub Desktop.
Determine if the hadoop fsimage file is older than the edits file by some threshold. If the fsimage is older than the edits by some significant value, this is a sign that the secondary namenode may not be properly working because it's not able to produce
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# | |
# This script is managed by puppet. | |
# | |
# This check determines if the Hadoop Namenode fsimage file is older | |
# than the edits file by some threshold. If the fsimage is older than | |
# the edits by some significant value, this is a sign that the secondary | |
# namenode may not be properly working because it's not able to produce | |
# the merged edits+fsimage file that gets sent back to the namenode. | |
# | |
use strict; | |
use warnings; | |
use File::stat; | |
use Getopt::Long; | |
if (not exists $ENV{SUDO_USER}) { | |
if (scalar getpwuid($>) ne 'hdfs') { | |
# re-exec this as hdfs user as a work around to being called from | |
# nrpe. | |
exec("/usr/bin/sudo -u hdfs $0 @ARGV"); | |
} | |
} | |
my $default_lag_seconds = 3600 * 4; | |
my $lag_seconds = $default_lag_seconds; | |
my @dirs ; | |
my $verbose; | |
my $help; | |
GetOptions( | |
"lag-seconds|l=s" => \$lag_seconds, | |
"dirs|d=s{,}" => \@dirs, | |
"help|h" => \$help, | |
); | |
if (defined $help) { | |
print <<EOF; | |
$0 [options] | |
--lag-seconds | |
-l number of seconds edits file can be ahead of fsimage. | |
default is: $default_lag_seconds; | |
--dirs | |
-d List of dirs that we check the namenode metadata on. | |
--help | |
-h This. | |
EOF | |
exit 3; | |
} | |
my @errors; | |
if (scalar @dirs < 1) { | |
warn "Please specify some directories to check\n"; | |
exit 3; | |
} | |
for my $dir (@dirs) { | |
if ( -d $dir ) { | |
if ( ! -d "$dir/name/current" ) { | |
push @errors, "$dir is not an HDFS metadata dir."; | |
next; | |
} | |
my $edits_file; | |
my $fsimage_file; | |
if ( -f "$dir/name/current/edits" ) { | |
$edits_file = stat("$dir/name/current/edits"); | |
} else { | |
push @errors, "$dir/name/current/edits is missing!"; | |
next; | |
} | |
if ( -f "$dir/name/current/fsimage" ) { | |
$fsimage_file = stat("$dir/name/current/fsimage"); | |
} else { | |
push @errors, "$dir/name/current/fsimage is missing!"; | |
next; | |
} | |
my $merge_lag = $edits_file->mtime - $fsimage_file->mtime; | |
if ($merge_lag > $lag_seconds) { | |
push @errors, "$dir fsimage is > $lag_seconds seconds older than edits file"; | |
} | |
} else { | |
push @errors, "$dir doesn't exist"; | |
} | |
} | |
if (scalar @errors > 0) { | |
print "CRITICAL: ", join("; ", @errors), "\n"; | |
exit 2; | |
} else { | |
print "OK: all dirs are happy and updating\n"; | |
exit 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment