Skip to content

Instantly share code, notes, and snippets.

@mainframe
Forked from wido/zabbix-ceph-mon.py
Created December 6, 2016 13:27
Show Gist options
  • Save mainframe/a7e018728e429128600786e0bd0fadda to your computer and use it in GitHub Desktop.
Save mainframe/a7e018728e429128600786e0bd0fadda to your computer and use it in GitHub Desktop.
Zabbix Ceph Monitor Cron
#!/usr/bin/env python
'''
Script to send Zabbix Agent active data to the Zabbix Server.
It runs the ceph command locally to get information from the running
OSDs and send the aggegrated data back to Zabbix.
'''
import sys
import subprocess
import socket
import tempfile
import os
import json
hostname = None
zbxactive = None
zbxsender = '/usr/bin/zabbix_sender'
def spawn(command, shell=True):
p = subprocess.Popen(command, stdout=subprocess.PIPE, shell=shell)
(result, err) = p.communicate()
return result.strip()
stats = {
'num_osds': 0,
'num_up_osds': 0,
'num_in_osds': 0,
'num_pgs': 0,
'num_pg_backfill': 0,
'num_pg_backfilling': 0,
'num_pg_incomplete': 0,
'num_pg_degraded': 0,
'num_pg_unclean': 0,
'num_pg_inactive': 0,
'num_pg_recovering': 0,
'overall_status': 0,
'slow_requests': 0,
'blocked_requests': 0
}
perf_dump = json.loads(spawn(['ceph', '-s', '--format=json'], False))
stats['num_osds'] = perf_dump['osdmap']['osdmap']['num_osds']
stats['num_up_osds'] = perf_dump['osdmap']['osdmap']['num_up_osds']
stats['num_in_osds'] = perf_dump['osdmap']['osdmap']['num_in_osds']
stats['num_pgs'] = perf_dump['pgmap']['num_pgs']
status = perf_dump['health']['overall_status']
if status == "HEALTH_OK":
stats['overall_status'] = 1
elif status == "HEALTH_WARN":
stats['overall_status'] = 2
elif status == "HEALTH_ERR":
stats['overall_status'] = 3
for sum in perf_dump['health']['summary']:
if 'slow' in sum['summary']:
stats['slow_requests'] = sum['summary'].split()[0]
if 'blocked' in sum['summary']:
stats['blocked_requests'] = sum['summary'].split()[0]
if sum['summary'].endswith('backfill'):
stats['num_pg_backfill'] = sum['summary'].split()[0]
if sum['summary'].endswith('backfilling'):
stats['num_pg_backfilling'] = sum['summary'].split()[0]
if sum['summary'].endswith('incomplete'):
stats['num_pg_incomplete'] = sum['summary'].split()[0]
if sum['summary'].endswith('unclean'):
stats['num_pg_unclean'] = sum['summary'].split()[0]
if sum['summary'].endswith('recovering'):
stats['num_pg_recovering'] = sum['summary'].split()[0]
if sum['summary'].endswith('inactive'):
stats['num_pg_inactive'] = sum['summary'].split()[0]
if sum['summary'].endswith('degraded'):
stats['num_pg_degraded'] = sum['summary'].split()[0]
for zbxserver in zbxactive:
command="%s -z %s -s %s -vv -i -" % (zbxsender, zbxserver, hostname)
zbxproc = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
try:
for key in stats:
zbxproc.stdin.write("%s ceph.mon.%s %d\n" % (hostname, key, int(stats[key])))
print "%s ceph.mon.%s %d" % (hostname, key, int(stats[key]))
except Exception as e:
print "Failed to fetch Ceph statistics: %s" % str(e)
output = zbxproc.communicate()[0]
print output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment