Skip to content

Instantly share code, notes, and snippets.

@wido
Created March 22, 2016 09:34
Show Gist options
  • Save wido/70ee1e3bd81d57ba1681 to your computer and use it in GitHub Desktop.
Save wido/70ee1e3bd81d57ba1681 to your computer and use it in GitHub Desktop.
Zabbix Ceph Monitor Cron
#!/usr/bin/env python
'''
Script to send Zabbix Agent active data to the Zabbix Server.
It runs the ceph command locally to get information from the running
OSDs and send the aggegrated data back to Zabbix.
'''
import sys
import subprocess
import socket
import tempfile
import os
import json
hostname = None
zbxactive = None
zbxsender = '/usr/bin/zabbix_sender'
def spawn(command, shell=True):
p = subprocess.Popen(command, stdout=subprocess.PIPE, shell=shell)
(result, err) = p.communicate()
return result.strip()
stats = {
'num_osds': 0,
'num_up_osds': 0,
'num_in_osds': 0,
'num_pgs': 0,
'num_pg_backfill': 0,
'num_pg_backfilling': 0,
'num_pg_incomplete': 0,
'num_pg_degraded': 0,
'num_pg_unclean': 0,
'num_pg_inactive': 0,
'num_pg_recovering': 0,
'overall_status': 0,
'slow_requests': 0,
'blocked_requests': 0
}
perf_dump = json.loads(spawn(['ceph', '-s', '--format=json'], False))
stats['num_osds'] = perf_dump['osdmap']['osdmap']['num_osds']
stats['num_up_osds'] = perf_dump['osdmap']['osdmap']['num_up_osds']
stats['num_in_osds'] = perf_dump['osdmap']['osdmap']['num_in_osds']
stats['num_pgs'] = perf_dump['pgmap']['num_pgs']
status = perf_dump['health']['overall_status']
if status == "HEALTH_OK":
stats['overall_status'] = 1
elif status == "HEALTH_WARN":
stats['overall_status'] = 2
elif status == "HEALTH_ERR":
stats['overall_status'] = 3
for sum in perf_dump['health']['summary']:
if 'slow' in sum['summary']:
stats['slow_requests'] = sum['summary'].split()[0]
if 'blocked' in sum['summary']:
stats['blocked_requests'] = sum['summary'].split()[0]
if sum['summary'].endswith('backfill'):
stats['num_pg_backfill'] = sum['summary'].split()[0]
if sum['summary'].endswith('backfilling'):
stats['num_pg_backfilling'] = sum['summary'].split()[0]
if sum['summary'].endswith('incomplete'):
stats['num_pg_incomplete'] = sum['summary'].split()[0]
if sum['summary'].endswith('unclean'):
stats['num_pg_unclean'] = sum['summary'].split()[0]
if sum['summary'].endswith('recovering'):
stats['num_pg_recovering'] = sum['summary'].split()[0]
if sum['summary'].endswith('inactive'):
stats['num_pg_inactive'] = sum['summary'].split()[0]
if sum['summary'].endswith('degraded'):
stats['num_pg_degraded'] = sum['summary'].split()[0]
for zbxserver in zbxactive:
command="%s -z %s -s %s -vv -i -" % (zbxsender, zbxserver, hostname)
zbxproc = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
try:
for key in stats:
zbxproc.stdin.write("%s ceph.mon.%s %d\n" % (hostname, key, int(stats[key])))
print "%s ceph.mon.%s %d" % (hostname, key, int(stats[key]))
except Exception as e:
print "Failed to fetch Ceph statistics: %s" % str(e)
output = zbxproc.communicate()[0]
print output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment