Skip to content

Instantly share code, notes, and snippets.

@kchristensen
Last active January 3, 2016 07:28
Show Gist options
  • Save kchristensen/913a23dcb64d35c6783e to your computer and use it in GitHub Desktop.
Save kchristensen/913a23dcb64d35c6783e to your computer and use it in GitHub Desktop.
Nagios DNS resolution health check
#!/usr/bin/env python
"""
Nagios check to test DNS resolution data pulled from Graphite
"""
import optparse
import logging
import requests
import sys
OK = 0
WARNING = 1
CRITICAL = 2
LOGGER = logging.getLogger(__name__)
LOGGER.addHandler(logging.StreamHandler(sys.stdout))
LOGGER.setLevel(logging.INFO)
parse = optparse.OptionParser()
parse.add_option('-s', '--server', type='string', dest='server',
help='hostname of the server being checked')
parse.add_option('-g', '--graphite', type='string', dest='graphite',
help='hostname of the graphite server to poll')
parse.add_option('-w', '--warning', type='int', dest='warning_threshold',
help='threshold at which to warn')
parse.add_option('-c', '--critical', type='int', dest='critical_threshold',
help='threshold at which to go critical')
parse.add_option('-u', '--url', type='string', dest='url',
help='url of the site being resolved')
parse.add_option('-p', '--points', type='int', dest='points', default='10',
help='number of historical data points to average')
(opts, args) = parse.parse_args()
# Require at least one argument
if not opts.server:
parse.print_help()
exit(OK)
# Define the metric url on the Graphite server
graphite_url = 'http://{0}/render/?from=-1days&target=servers.{1}.dns.{2}' \
'&rawData'.format(opts.graphite, opts.server, opts.url.replace('.', '_'))
try:
# Grab Graphite data and parse out the data points
data = requests.get(graphite_url).text.split('|')[-1].rstrip('\n')
# Replace None with zeros
data = [d.replace('None', '0.0') for d in data.split(',')]
# Convert everything from strings -> floats
data = [float(d) for d in data[-opts.points:]]
# Determine the average response time over the past X points
average = float(sum(data) / opts.points)
if not average:
LOGGER.info('CRITICAL - DNS Resolution is failing!')
exit(CRITICAL)
elif average > opts.critical_threshold:
LOGGER.info('CRITICAL - DNS Resolution took %s ms' % average)
exit(CRITICAL)
elif average > opts.warning_threshold:
LOGGER.info('WARNING - DNS Resolution took %s ms' % average)
exit(WARNING)
else:
LOGGER.info('OK - DNS Resolution took %s ms' % average)
exit(OK)
except requests.ConnectionError:
LOGGER.critical('CRITICAL - There was an error connecting to graphite')
exit(CRITICAL)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment