Skip to content

Instantly share code, notes, and snippets.

@cocoy
Created January 30, 2013 09:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cocoy/4671876 to your computer and use it in GitHub Desktop.
Save cocoy/4671876 to your computer and use it in GitHub Desktop.
Modified a bit.
#!/usr/bin/env python
"""
check_redis.py: Nagios plugin for checking a redis server.
Author: Steffen Zieger <me@saz.sh>
License: GPL
Version: 1.0
"""
from optparse import OptionParser
import sys
import socket
import redis
# Constants
EXIT_OK = 0
EXIT_WARN = 1
EXIT_CRIT = 2
EXIT_UNKNOWN = 3
class RedisCheck(object):
def __init__(self, host, port=6379, password=None, clientsWarn=None,
clientsCrit=None, memWarn=None, memCrit=None, upWarn=None,
upCrit=None, slavesWarn=None, slavesCrit=None):
self.status = EXIT_OK
self.message = False
self.host = host
self.port = port
self.password = password
self.clientsWarn = clientsWarn
self.clientsCrit = clientsCrit
self.slavesWarn = slavesWarn
self.slavesCrit = slavesCrit
self.memWarn = float(memWarn)
self.memCrit = float(memCrit)
self.upWarn = upWarn
self.upCrit = upCrit
self._fetchInfo()
def _setStatus(self, status):
""" Set the status only, if new status is not lower than current status"""
if status > 0:
self.status = status
def _setMessage(self, message):
if self.message:
self.message += ", "
self.message += message
else:
self.message = message
def _exit(self):
print self.message + self._getPerfData()
sys.exit(self.status)
def _fetchInfo(self):
try:
self.info = redis.Redis(host=self.host, port=self.port, password=self.password).info()
except redis.ConnectionError:
self._setStatus(EXIT_CRIT)
self._setMessage("Can't connect to %s:%s" % (self.host, self.port))
self._exit()
def getStatus(self):
return self.status
def getRole(self):
return self.info['role']
def _getPerfData(self):
"Returns various perf data values for graphs in Nagios"
return "|role=%s,uptime=%ss;%s;%s,connectedClients=%s;%s;%s," \
"connectedSlaves=%s;%s;%s,usedMemory=%sMB;%s;%s" % \
( self.getRole(),
self.getUptime()['s'], self.upWarn, self.upCrit,
self.getConnectedClients(), self.clientsWarn,
self.clientsCrit, self.getConnectedSlaves(),
self.slavesWarn, self.slavesCrit,
self.getUsedMem(), self.memWarn, self.memCrit)
def getUptime(self):
uptime = int(self.info['uptime_in_seconds'])
ret = {}
ret['d'] = uptime / 86400
ret['h'] = (uptime % 86400) / 3600
ret['m'] = (uptime % 3600) / 60
ret['s'] = uptime
return ret
def getConnectedClients(self):
return self.info['connected_clients']
def getConnectedSlaves(self):
return self.info['connected_slaves']
def getUsedMem(self):
return "%.2f" % float(self.info['used_memory'] / 1024.0 / 1024.0)
def getLastSave(self):
return self.info['last_save_time']
def checkUptime(self):
uptime = self.getUptime()
if uptime['s'] < self.upCrit:
self._setMessage("Uptime is %s seconds" % (uptime['s']))
self._setStatus(EXIT_CRIT)
elif uptime['s'] < self.upWarn:
self._setMessage("Uptime is %s minutes" % (uptime['m']))
self._setStatus(EXIT_WARN)
else:
days = 'days'
if uptime['d'] == 1:
days = 'day'
self._setMessage("Uptime is %s %s, %s:%s h" % (uptime['d'], days,
uptime['h'],
uptime['m']))
self._setStatus(EXIT_OK)
# If mem reached that amount of RAM warn.
def checkMemory(self):
mem = float(self.getUsedMem())
if mem > self.memCrit:
ret = EXIT_CRIT
elif mem > self.memWarn:
ret = EXIT_WARN
else:
ret = EXIT_OK
self._setStatus(ret)
self._setMessage("Used Memory: %s MB" % (mem))
def checkConnectedClients(self):
clients = self.getConnectedClients()
if clients > self.clientsCrit:
ret = EXIT_CRIT
elif clients > self.clientsWarn:
ret = EXIT_WARN
else:
ret = EXIT_OK
self._setStatus(ret)
self._setMessage("Connected Clients: %s" % (clients))
def checkConnectedSlaves(self):
slaves = self.getConnectedSlaves()
if slaves < self.slavesCrit:
ret = EXIT_CRIT
elif slaves < self.slavesWarn:
ret = EXIT_WARN
else:
ret = EXIT_OK
self._setStatus(ret)
self._setMessage("Connected Slaves: %s" % (slaves))
def checkRole(self):
role = self.getRole()
self._setMessage("Role: %s" % role )
if role == 'slave':
self._setMessage("MasterHost: %s" % self.info['master_host'])
self._setMessage("MasterLinkStatus: %s" % self.info['master_link_status'])
self._setMessage("MasterLastIOSec: %s" % self.info['master_last_io_seconds_ago'])
self._setMessage("MasterSyncInProgress: %s" % self.info['master_sync_in_progress'])
def runChecks(self):
self.checkRole()
self.checkUptime()
self.checkMemory()
self.checkConnectedClients()
self.checkConnectedSlaves()
def check(self):
self.runChecks()
self._exit()
def get_info(host, port, timeout):
socket.setdefaulttimeout(timeout or None)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((host, port))
s.send("*1\r\n$4\r\ninfo\r\n")
buf = ""
while '\r\n\r\n' not in buf:
buf += s.recv(1024)
s.close()
return dict(x.split(':', 1) for x in buf.split('\r\n') if ':' in x)
def build_parser():
parser = OptionParser()
parser.add_option("-H", "--server", dest="server", help="Redis server to connect to.", default="127.0.0.1")
parser.add_option("-p", "--port", dest="port", help="Redis port to connect to.", type="int", default=6379)
parser.add_option("-M", "--critical_memory", dest="crit_memory", help="Memory utilization (in MB) that triggers a critical status.", type="float", default=256)
parser.add_option("-m", "--warn_memory", dest="warn_memory", help="Memory utilization (in MB) that triggers a warning status.", type="float", default=128)
parser.add_option("-C", "--critical_client", dest="crit_client", help="Connected Clients that triggers a critical status.", type="int", default=80)
parser.add_option("-c", "--warn_client", dest="warn_client", help="Connected Clients that triggers a critical status.", type="int", default=60)
parser.add_option("-S", "--critical_slave", dest="crit_slave", help="Connected Slave that triggers a critical status.", type="int", default=0)
parser.add_option("-s", "--warn_slave", dest="warn_slave", help="Connected Slave that triggers a critical status.", type="int", default=0)
parser.add_option("-t", "--timeout", dest="timeout", help="Number of milliesconds to wait before timing out and considering redis down", type="int", default=2000)
return parser
def main():
parser = build_parser()
options, _args = parser.parse_args()
try:
info = get_info(options.server, int(options.port), timeout=options.timeout / 1000.0)
except socket.error, exc:
print "CRITICAL: Error connecting or getting INFO from redis %s:%s: %s" % (options.server, options.port, exc)
sys.exit(EXIT_CRIT)
redisCheck = RedisCheck(options.server, memWarn=options.warn_memory, memCrit=options.crit_memory, upWarn=900,
upCrit=60, clientsWarn=options.warn_client, clientsCrit=options.crit_client,
slavesWarn=options.warn_slave, slavesCrit=options.crit_slave)
redisCheck.check()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment