Skip to content

Instantly share code, notes, and snippets.

@smitmartijn
Created November 6, 2015 20:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save smitmartijn/6aac405e8a7eff8be532 to your computer and use it in GitHub Desktop.
Save smitmartijn/6aac405e8a7eff8be532 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
#
# This script connects to a MySQL of MariaDB database servers and
# gets information about the Galera Cluster (http://galeracluster.com/)
# to use that information to determine the health of the cluster and then
# return the health status in a format that nagios understands.
#
# This script needs a database user, preferbly one with only USAGE rights. Here's an example:
# GRANT USAGE ON *.* TO 'myuser'@'myhost' IDENTIFIED BY 'mypassword';
#
# EXAMPLE on how to use it in nagios:
#
# define service {
# host_name db1.local
# service_description MySQL - Check Galera Cluster Status
# check_command check_galera_status.pl!-h $HOSTADDRESS$ -u myuser -p mypassword --nodes-warn=2 --nodes-crit=1
# normal_check_interval 5
# retry_check_interval 1
# check_period 24x7
# max_check_attempts 3
# flap_detection_enabled 1
# notifications_enabled 1
# notification_period 24x7
# notification_interval 60
# notification_options c,f,r,u,w
# contact_groups hostgroup4_servicegroup49
# notes Database - Galera:Checks a Galera Cluster for functionality
# use service-global
# }
#
# Martijn Smit <martijn@lostdomain.org>
#
# 06-11-2015: First commit
use strict;
use warnings "all";
use DBI;
use Getopt::Long;
use File::Basename;
my %returnCodes = (
'OK' => 0,
'Warning' => 1,
'Critical' => 2,
'Unknown' => 3
);
# default values
my $SCRIPTNAME = basename($0);
my $showHelp = 0;
my $connectionPort = 3306;
my $connectionHost = 'localhost';
my $connectionUser = '';
my $connectionPassword = '';
my $amountOfNodesWarn = 2;
my $amountOfNodesCrit = 1;
sub usage
{
print <<EOF;
DESCRIPTION
Nagios script to check if a MySQL/MariaDB Galera Cluster is fully operational.
This script needs a database user, preferbly one with only USAGE rights. Here's an example:
GRANT USAGE ON *.* TO 'myuser'\@'myhost' IDENTIFIED BY 'mypassword';
OPTIONS
--help, -? Display this help.
--host=name, -h Hostname or IP of database server (default $connectionHost).
--password=name, -p Password of user to use when connecting to database server.
--port=# Port number where database listens to (default $connectionPort).
--user=name, -u Check user for connecting to the database server.
--nodes-warn=# Turn warning when number of nodes hits this (default $amountOfNodesWarn).
--nodes-crit=# Turn critical when number of nodes hits this (default $amountOfNodesCrit).
EXAMPLE
When you have 3 nodes and want to get a warning when 1 fails and a critical when 2 fail:
$SCRIPTNAME --user=myuser --password=mypassword --host=db01.local --port=3306 --nodes-warn=2 --nodes-crit=1
EOF
}
# use Perls GetOptions() to sort of the supplied parameters
my $params = GetOptions(
'help|?' => \$showHelp,
'user|u=s' => \$connectionUser,
'password|p=s' => \$connectionPassword,
'host|h=s' => \$connectionHost,
'port=i' => \$connectionPort,
'nodes-warn=i' => \$amountOfNodesWarn,
'nodes-crit=i' => \$amountOfNodesCrit
);
# does the user just want the help page?
if ($showHelp) {
&usage();
exit($returnCodes{'OK'});
}
# check parameters
if ($connectionUser eq '') {
print "Error: Username not provided!\n";
exit($returnCodes{'Critical'});
}
if ($connectionPassword eq '') {
print "Error: Password not provided!\n";
exit($returnCodes{'Critical'});
}
# Nodes not specfied or values out of range (<2 > 64)
if (($amountOfNodesWarn < 1) || ($amountOfNodesWarn > 128)) {
print "Number Galera Cluster nodes is out of expected range (2...128): $amountOfNodesWarn\n";
exit($returnCodes{'Critical'});
}
if (($amountOfNodesCrit < 1) || ($amountOfNodesCrit > 128)) {
print "Number Galera Cluster nodes is out of expected range (2...128): $amountOfNodesCrit\n";
exit($returnCodes{'Critical'});
}
# the perl option wrapper can report errors
if (!$params) {
print("Error in parameters. User = $connectionUser, Password=hidden, Host = $connectionHost, Port = $connectionPort");
exit($returnCodes{'Critical'});
}
# start connecting to the database and executing a SHOW command
my $dsn = "DBI:mysql::host=$connectionHost:port=$connectionPort;mysql_connect_timeout=10";
my $dbh = DBI->connect($dsn, $connectionUser, $connectionPassword, { RaiseError => 0, PrintError => 0, AutoCommit => 0 });
if (DBI::err)
{
if (DBI::errstr =~ m/Can't connect to/) {
print("Error during connection: " . DBI::errstr."\n");
exit($returnCodes{'Critical'});
}
if (DBI::errstr =~ m/Access denied for user/) {
print("User does not have access privileges to database: " . DBI::errstr."\n");
exit($returnCodes{'Critical'});
}
print("Error during connection: " . DBI::errstr."\n");
exit($returnCodes{'Critical'});
}
# retrieve some of the 'wsrep' status variables to check
my $query = "SHOW GLOBAL STATUS WHERE variable_name in ('wsrep_cluster_size', 'wsrep_cluster_status', 'wsrep_ready', 'wsrep_connected', 'wsrep_local_state_comment', 'wsrep_local_recv_queue_avg')";
# prepare query and check for errors
my $sth = $dbh->prepare($query);
if (DBI::err)
{
print("Error in preparing query: " . DBI::errstr."\n");
$dbh->disconnect;
exit($returnCodes{'Critical'});
}
# execute query and check for errors
$sth->execute();
if ($sth->err)
{
print("Error in executing query: " . $sth->errstr."\n");
$dbh->disconnect;
exit($returnCodes{'Critical'});
}
# retrieve rows into an array and check for errors
my ($key, $value);
$sth->bind_columns(undef, \$key, \$value);
if ($sth->err)
{
print("Error in binding columns: " . $sth->errstr."\n");
$sth->finish;
$dbh->disconnect;
exit($returnCodes{'Critical'});
}
# loop through results and populate our resulting array
my %results;
while ($sth->fetchrow_arrayref())
{
$results{$key} = $value;
}
# check for errors while retrieving data
if ($sth->err)
{
print("Error in fetchting data:" . $sth->err."\n");
$sth->finish;
$dbh->disconnect;
exit($returnCodes{'Critical'});
}
# disconnect database
$sth->finish;
$dbh->disconnect;
# start checking the results
if (!defined($results{'wsrep_cluster_status'}))
{
print "Warning: It looks like this is not a MySQL/MariaDB Galera cluster. The variable wsrep_cluster_status is not defined.\n";
exit($returnCodes{'Warning'});
}
# check for node status, this needs to be Primary if it's functional
if ($results{'wsrep_cluster_status'} ne "Primary")
{
print "Critical: Node is not Primary: ".$results{'wsrep_cluster_status'}."\n";
exit($returnCodes{'Critical'});
}
# check whether the node is ready
if ($results{'wsrep_ready'} ne "ON")
{
print "Critical: Node is not ready: ".$results{'wsrep_cluster_status'}."\n";
exit($returnCodes{'Critical'});
}
# check if the node is connected to others
if ($results{'wsrep_connected'} ne "ON")
{
print "Critical: Node is not connected: ".$results{'wsrep_cluster_status'}."\n";
exit($returnCodes{'Critical'});
}
# check for data synchronisation status
if ($results{'wsrep_local_state_comment'} ne "Synced")
{
print "Critical: Node is not synced: ".$results{'wsrep_cluster_status'}." - ".$results{'wsrep_local_state_comment'}."\n";
exit($returnCodes{'Critical'});
}
# check for amount of nodes below the warning threshold
if ($results{'wsrep_cluster_size'} <= $amountOfNodesWarn)
{
print "Warning: cluster_size: ".$results{'wsrep_cluster_size'}.", cluster_status: ".$results{'wsrep_cluster_status'}."\n";
exit($returnCodes{'Warning'});
}
# check for amount of nodes below the critical threshold
if ($results{'wsrep_cluster_size'} <= $amountOfNodesCrit)
{
print "Critical: cluster_size: ".$results{'wsrep_cluster_size'}.", cluster_status: ".$results{'wsrep_cluster_status'}."\n";
exit($returnCodes{'Critical'});
}
# check the receiving queue..if this is larger than 0.5, the node is having load issues and is pauzing dataflows (not good)
if ($results{'wsrep_local_recv_queue_avg'} gt 0.5)
{
print "Warning: recv_queue_avg is higher than 0.5: ".$results{'wsrep_local_recv_queue_avg'}.", cluster_status: ".$results{'wsrep_cluster_status'}."\n";
exit($returnCodes{'Warning'});
}
# we're all good!
print "OK cluster_size: ".$results{'wsrep_cluster_size'}.", cluster_status: ".$results{'wsrep_cluster_status'}.", local_state: ".$results{'wsrep_local_state_comment'}."\n";
exit($returnCodes{'OK'});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment