Skip to content

Instantly share code, notes, and snippets.

@mastbaum
Created April 14, 2014 16:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mastbaum/10663894 to your computer and use it in GitHub Desktop.
Save mastbaum/10663894 to your computer and use it in GitHub Desktop.
Nagios SMART status check
#!/usr/bin/env python
'''Report the SMART status of hard disks using a log file.'''
import argparse
import subprocess
import sys
# Nagios status codes
OK, WARNING, CRITICAL, UNKNOWN = range(4)
# Bits in the return code of smartctl
BITS = (
('smartctl error', 3),
('Device open failed', 2),
('SMART or ATA command failed', 1),
('Disk failing', 1),
('Prefail attributes over threshold', 1),
('Attributes over threshold in the past', 1),
('Error log contains SMART errors', 1),
('Self-test lof contains errors', 1),
)
def main(logfile):
with open(logfile, 'r') as f:
last_line = f.readlines()[-1][:-1]
timestamp, devices = last_line.split(';', 1)
devices = devices.split(';')
total_status = 0
output = []
for device in devices:
name, code = device.split('::')
status = 0
status_string = []
for i in range(8):
if (int(code) & (1 << i)):
meaning, c = BITS[i]
status = max(status, c)
status_string.append(meaning)
if status > 0:
status_string = ', '.join(status_string) + ' (%s)' % bin(int(code))
else:
status_string = 'OK'
total_status = max(total_status, status)
output.append('%s: %s' % (name, status_string))
if total_status == 0:
s = 'OK'
elif total_status == 1:
s = 'WARNING'
elif total_status == 2:
s = 'CRITICAL'
else:
s = 'UNKNOWN'
output = 'SMART STATUS %s - ' % s + ', '.join(output)
print output
return total_status
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--logfile', '-f',
default='/var/log/smart_status.log',
help='Path to SMART status log file')
args = parser.parse_args()
try:
status = main(args.logfile)
except Exception as e:
print 'SMART STATUS UNKNOWN - Python exception %s' % str(e)
status = UNKNOWN
sys.exit(status)
#!/usr/bin/env python
'''Record the status of hard disks using SMART data, writing to a log file.'''
import argparse
import subprocess
import sys
def check_smart(device):
args = ['smartctl', '--all', '-q', 'silent'] + device.split()
return subprocess.call(args)
def main(logfile, devices):
date_cmd = subprocess.Popen(['date', '+%s'], stdout=subprocess.PIPE)
now = date_cmd.communicate()[0].rstrip()
line = [now]
for device in devices:
code = check_smart(device)
# Prettify devices on RAID controllers/HBAs
if '-d' in device:
device = device.split()[1]
s = '%s::%i' % (device, code)
line.append(s)
line = ';'.join(line) + '\n'
with open(logfile, 'a') as f:
f.write(line)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--logfile', '-f',
default='/var/log/smart_status.log',
help='Path to SMART status log file')
parser.add_argument('--device', '-x', action='append',
default=[],
help='Add a device to monitor')
args = parser.parse_args()
if len(args.device) == 0:
sys.stderr.write('No devices specified.\n')
parser.print_help()
sys.exit(1)
main(args.logfile, args.device)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment