Skip to content

Instantly share code, notes, and snippets.

@arsenio
Last active December 19, 2015 12:19
Show Gist options
  • Save arsenio/5953753 to your computer and use it in GitHub Desktop.
Save arsenio/5953753 to your computer and use it in GitHub Desktop.
A better NRPE plugin for checking Redis, including optional slave monitoring. Based on the work of Samuel Stauffer (see https://gist.github.com/samuel)
#!/usr/bin/python
# Originally written by Samuel Stauffer <https://gist.github.com/samuel>
# Modified by Arsenio Santos <arsenio@gmail.com>
from __future__ import division
import socket
import sys
from optparse import OptionParser
EXIT_OK = 0
EXIT_WARN = 1
EXIT_CRITICAL = 2
def get_info(host, port, timeout):
socket.setdefaulttimeout(timeout or None)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((host, port))
s.send("*1\r\n$4\r\ninfo\r\n")
total_buf = ""
looping = True
while looping:
buf = s.recv(1024)
total_buf += buf
looping = (len(buf) == 1024)
s.close()
return dict(x.split(':', 1) for x in total_buf.split('\r\n') if ':' in x)
def build_parser():
parser = OptionParser()
parser.add_option("-s", "--server", dest="server", help="Redis server to connect to.", default="127.0.0.1")
parser.add_option("-p", "--port", dest="port", help="Redis port to connect to.", type="int", default=6379)
parser.add_option("-w", "--warn", dest="warn_memory", help="Memory utilization (in MB) that triggers a warning status.", type="int")
parser.add_option("-c", "--critical", dest="crit_memory", help="Memory utilization (in MB) that triggers a critical status.", type="int")
parser.add_option("-t", "--timeout", dest="timeout", help="Number of milliesconds to wait before timing out and considering redis down", type="int", default=2000)
parser.add_option("-S", "--slaveof", dest="slaveof", help="Redis server to slave from, if any.", default=None)
parser.add_option("-L", "--slavelag", dest="slavelag", help="Age, in seconds, of sync last IO with master that triggers a warning.", default=15)
parser.add_option("-D", "--slavedelta", dest="slavedelta", help="Largest discrepency, in percent, between master and slave key counts before warning.", default=0.0001)
return parser
def main():
parser = build_parser()
options, _args = parser.parse_args()
if not options.warn_memory:
parser.error("Warning level required")
if not options.crit_memory:
parser.error("Critical level required")
try:
info = get_info(options.server, int(options.port), timeout=options.timeout / 1000.0)
except socket.error, exc:
print "CRITICAL: Error connecting or getting INFO from redis %s:%s: %s" % (options.server, options.port, exc)
sys.exit(EXIT_CRITICAL)
memory = int(info.get("used_memory_rss") or info["used_memory"]) / (1024*1024)
if memory > options.crit_memory:
print "CRITICAL: Redis memory usage is %dMB (threshold %dMB)" % (memory, options.crit_memory)
sys.exit(EXIT_CRITICAL)
elif memory > options.warn_memory:
print "WARN: Redis memory usage is %dMB (threshold %dMB)" % (memory, options.warn_memory)
sys.exit(EXIT_WARN)
ok_message = "OK: Redis memory usage is %dMB" % memory
# Slave tests; make sure the role is right, the master IP matches, the link is up,
# the last sync IO isn't too lagged, and the delta between keys is small.
if options.slaveof:
role = info.get("role", "master")
if role != "slave":
print "CRITICAL: Redis instance is not a slave"
sys.exit(EXIT_CRITICAL)
master_host = info.get("master_host")
if options.slaveof != master_host:
print "CRITICAL: Redis instance is not a slave of %s (but is a slave of %s instead)" % (options.slaveof, master_host)
sys.exit(EXIT_CRITICAL)
master_link_status = info.get("master_link_status")
if master_link_status != "up":
print "CRITICAL: Redis slave link status is %s" % master_link_status
sys.exit(EXIT_CRITICAL)
master_last_io_seconds_ago = int(info.get("master_last_io_seconds_ago"))
if master_last_io_seconds_ago > int(options.slavelag):
print "WARN: Redis slave last IO was %d seconds agos" % master_last_io_seconds_ago
sys.exit(EXIT_WARN)
ok_message = "%s; last sync IO was %d seconds ago" % (ok_message, master_last_io_seconds_ago)
try:
remote_info = get_info(options.slaveof, int(options.port), timeout=options.timeout / 1000.0)
except socket.error, exc:
print "CRITICAL: Error connecting or getting INFO from redis master %s:%s: %s" % (options.slaveof, options.port, exc)
sys.exit(EXIT_CRITICAL)
slave_keys = info.get("db0")
slave_key_data = dict(x.split('=') for x in slave_keys.split(','))
slave_key_count = int(slave_key_data['keys'])
remote_keys = remote_info.get("db0")
remote_key_data = dict(x.split('=') for x in remote_keys.split(','))
remote_key_count = int(remote_key_data['keys'])
delta = 100 * abs(remote_key_count - slave_key_count) / remote_key_count
if delta > float(options.slavedelta):
print "WARN: Key count of Redis master and slave differ by %0.006f%%" % delta
sys.exit(EXIT_WARN)
ok_message = "%s; key count of master and slave differ by %0.006f%%" % (ok_message, delta)
print ok_message
sys.exit(EXIT_OK)
if __name__ == "__main__":
main()
@filippog
Copy link

filippog commented Dec 3, 2015

hi,
thanks for slave changes, could you add a license to it too so it is easier to redistribute? afaict the original is MIT-licensed

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment