Last active
December 19, 2015 12:19
-
-
Save arsenio/5953753 to your computer and use it in GitHub Desktop.
A better NRPE plugin for checking Redis, including optional slave monitoring. Based on the work of Samuel Stauffer (see https://gist.github.com/samuel)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Originally written by Samuel Stauffer <https://gist.github.com/samuel> | |
# Modified by Arsenio Santos <arsenio@gmail.com> | |
from __future__ import division | |
import socket | |
import sys | |
from optparse import OptionParser | |
EXIT_OK = 0 | |
EXIT_WARN = 1 | |
EXIT_CRITICAL = 2 | |
def get_info(host, port, timeout): | |
socket.setdefaulttimeout(timeout or None) | |
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
s.connect((host, port)) | |
s.send("*1\r\n$4\r\ninfo\r\n") | |
total_buf = "" | |
looping = True | |
while looping: | |
buf = s.recv(1024) | |
total_buf += buf | |
looping = (len(buf) == 1024) | |
s.close() | |
return dict(x.split(':', 1) for x in total_buf.split('\r\n') if ':' in x) | |
def build_parser(): | |
parser = OptionParser() | |
parser.add_option("-s", "--server", dest="server", help="Redis server to connect to.", default="127.0.0.1") | |
parser.add_option("-p", "--port", dest="port", help="Redis port to connect to.", type="int", default=6379) | |
parser.add_option("-w", "--warn", dest="warn_memory", help="Memory utilization (in MB) that triggers a warning status.", type="int") | |
parser.add_option("-c", "--critical", dest="crit_memory", help="Memory utilization (in MB) that triggers a critical status.", type="int") | |
parser.add_option("-t", "--timeout", dest="timeout", help="Number of milliesconds to wait before timing out and considering redis down", type="int", default=2000) | |
parser.add_option("-S", "--slaveof", dest="slaveof", help="Redis server to slave from, if any.", default=None) | |
parser.add_option("-L", "--slavelag", dest="slavelag", help="Age, in seconds, of sync last IO with master that triggers a warning.", default=15) | |
parser.add_option("-D", "--slavedelta", dest="slavedelta", help="Largest discrepency, in percent, between master and slave key counts before warning.", default=0.0001) | |
return parser | |
def main(): | |
parser = build_parser() | |
options, _args = parser.parse_args() | |
if not options.warn_memory: | |
parser.error("Warning level required") | |
if not options.crit_memory: | |
parser.error("Critical level required") | |
try: | |
info = get_info(options.server, int(options.port), timeout=options.timeout / 1000.0) | |
except socket.error, exc: | |
print "CRITICAL: Error connecting or getting INFO from redis %s:%s: %s" % (options.server, options.port, exc) | |
sys.exit(EXIT_CRITICAL) | |
memory = int(info.get("used_memory_rss") or info["used_memory"]) / (1024*1024) | |
if memory > options.crit_memory: | |
print "CRITICAL: Redis memory usage is %dMB (threshold %dMB)" % (memory, options.crit_memory) | |
sys.exit(EXIT_CRITICAL) | |
elif memory > options.warn_memory: | |
print "WARN: Redis memory usage is %dMB (threshold %dMB)" % (memory, options.warn_memory) | |
sys.exit(EXIT_WARN) | |
ok_message = "OK: Redis memory usage is %dMB" % memory | |
# Slave tests; make sure the role is right, the master IP matches, the link is up, | |
# the last sync IO isn't too lagged, and the delta between keys is small. | |
if options.slaveof: | |
role = info.get("role", "master") | |
if role != "slave": | |
print "CRITICAL: Redis instance is not a slave" | |
sys.exit(EXIT_CRITICAL) | |
master_host = info.get("master_host") | |
if options.slaveof != master_host: | |
print "CRITICAL: Redis instance is not a slave of %s (but is a slave of %s instead)" % (options.slaveof, master_host) | |
sys.exit(EXIT_CRITICAL) | |
master_link_status = info.get("master_link_status") | |
if master_link_status != "up": | |
print "CRITICAL: Redis slave link status is %s" % master_link_status | |
sys.exit(EXIT_CRITICAL) | |
master_last_io_seconds_ago = int(info.get("master_last_io_seconds_ago")) | |
if master_last_io_seconds_ago > int(options.slavelag): | |
print "WARN: Redis slave last IO was %d seconds agos" % master_last_io_seconds_ago | |
sys.exit(EXIT_WARN) | |
ok_message = "%s; last sync IO was %d seconds ago" % (ok_message, master_last_io_seconds_ago) | |
try: | |
remote_info = get_info(options.slaveof, int(options.port), timeout=options.timeout / 1000.0) | |
except socket.error, exc: | |
print "CRITICAL: Error connecting or getting INFO from redis master %s:%s: %s" % (options.slaveof, options.port, exc) | |
sys.exit(EXIT_CRITICAL) | |
slave_keys = info.get("db0") | |
slave_key_data = dict(x.split('=') for x in slave_keys.split(',')) | |
slave_key_count = int(slave_key_data['keys']) | |
remote_keys = remote_info.get("db0") | |
remote_key_data = dict(x.split('=') for x in remote_keys.split(',')) | |
remote_key_count = int(remote_key_data['keys']) | |
delta = 100 * abs(remote_key_count - slave_key_count) / remote_key_count | |
if delta > float(options.slavedelta): | |
print "WARN: Key count of Redis master and slave differ by %0.006f%%" % delta | |
sys.exit(EXIT_WARN) | |
ok_message = "%s; key count of master and slave differ by %0.006f%%" % (ok_message, delta) | |
print ok_message | |
sys.exit(EXIT_OK) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hi,
thanks for slave changes, could you add a license to it too so it is easier to redistribute? afaict the original is MIT-licensed