Last active
July 30, 2018 22:30
-
-
Save alek-p/24e1ad9437bfd694e03ad216b5386f56 to your computer and use it in GitHub Desktop.
Capture zfs and other debuging info
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# | |
# This file and its contents are supplied under the terms of the | |
# Common Development and Distribution License ("CDDL"), version 1.0. | |
# You may only use this file in accordance with the terms of version | |
# 1.0 of the CDDL. | |
# | |
# A copy of the CDDL is available via the Internet at | |
# http://www.illumos.org/license/CDDL. | |
# | |
# | |
# Copyright (c) 2018 Datto Inc. | |
# | |
import sys | |
import getopt | |
import signal | |
import subprocess | |
import json | |
import datetime | |
import time | |
import os | |
import tarfile | |
import shutil | |
default_timeout = 5 | |
default_conf = """ | |
[ | |
{ | |
"category": "version", | |
"enabled": 1, | |
"cmds": [ | |
{ "cmd": "uname -a" }, | |
{ "cmd": "cat /sys/module/spl/version" }, | |
{ "cmd": "cat /sys/module/zfs/version", "timeout": 5, "enabled": 1} | |
] | |
}, | |
{ | |
"category": "zstat", | |
"enabled": 1, | |
"cmds": [ | |
{ "cmd": "cat /proc/spl/kstat/zfs/dbgmsg" }, | |
{ "cmd": "cat /proc/spl/taskq-all" }, | |
{ "cmd": "cat /proc/spl/kmem/slab", "repeat": 3, "sleep": 1 }, | |
{ "cmd": "cat /proc/spl/taskq", "repeat": 5, "sleep": 1 }, | |
{ "cmd": "cat /proc/spl/kstat/zfs/arcstats", "repeat": 5, "sleep": 1 }, | |
{ "cmd": "cat /proc/spl/kstat/zfs/zil", "repeat": 3, "sleep": 5 }, | |
{ "cmd": "cat /proc/spl/kstat/zfs/dmu_tx", "repeat": 3, "sleep": 5 }, | |
{ "cmd": "echo 100 > /sys/module/zfs/parameters/zfs_txg_history", "shell": "1" }, | |
{ "cmd": "tail /proc/spl/kstat/zfs/dattoArray/txgs", "repeat": 3, "sleep": 5 } | |
] | |
}, | |
{ | |
"category": "perf", | |
"enabled": 1, | |
"cmds": [ | |
{ "cmd": "uptime" }, | |
{ "cmd": "dmesg -T" }, | |
{ "cmd": "free -m" }, | |
{ "cmd": "ps -ef" }, | |
{ "cmd": "vmstat 1 3" }, | |
{ "cmd": "mpstat -P ALL 1 3" }, | |
{ "cmd": "pidstat 1 3" }, | |
{ "cmd": "iostat -xz 5 3", "timeout": 20 }, | |
{ "cmd": "sar -n DEV 1 3", "enabled": 1 }, | |
{ "cmd": "sar -n TCP,ETCP 1 3", "enabled": 1 } | |
] | |
}, | |
{ | |
"category": "debug", | |
"enabled": 1, | |
"cmds": [ | |
{ "cmd": "echo t > /proc/sysrq-trigger", "shell": 1 }, | |
{ "cmd": "cat /var/log/kern.log" }, | |
{ "cmd": "cat /var/log/syslog" }, | |
{ "cmd": "cat /var/log/dpkg.log" } | |
] | |
}, | |
{ | |
"category": "zfs", | |
"enabled": 0, | |
"cmds": [ | |
{ "cmd": "zfs get all dattoArray" }, | |
{ "cmd": "zfs list -d 2 -r dattoArray", "timeout": 30 }, | |
{ "cmd": "zpool history -i dattoArray", "timeout": 30 }, | |
{ "cmd": "zpool events -v dattoArray" }, | |
{ "cmd": "zpool get all dattoArray" }, | |
{ "cmd": "zpool status dattoArray" }, | |
{ "cmd": "zpool list -v dattoArray" }, | |
{ "cmd": "zpool iostat dattoArray -v 5 3", "timeout": 20 }, | |
{ "cmd": "zpool iostat dattoArray -vq 5 3", "timeout": 20 } | |
] | |
} | |
]""" | |
def usage(): | |
print("Usage:", sys.argv[0], " <-c enable_category1,enable_category2,...>") | |
sys.exit(1) | |
try: | |
# Read command line args | |
opts, args = getopt.getopt(sys.argv[1:], 'c:') | |
except getopt.GetoptError as err: | |
print(str(err)) | |
usage() | |
force_enable_categories = [ ] | |
for opt, arg in opts: | |
if opt == '-c': | |
force_enable_categories = str(arg).split(",") | |
ts = datetime.datetime.now().isoformat() | |
log_id = 'log_state_' + ts.replace(":", "."); | |
output_dir = '/tmp/' + log_id + "/" | |
os.makedirs(output_dir) | |
conf_filename = output_dir + 'log_state_conf.json' | |
fd = open(conf_filename, 'w+') | |
fd.write(default_conf) | |
fd.close() | |
with open(conf_filename, 'r') as json_file: | |
config = json.load(json_file) | |
json_file.close() | |
def create_tarfile(output_filename, source_dir): | |
print('Creating archive:', output_filename) | |
with tarfile.open(output_filename, 'w:gz') as archive: | |
archive.add(source_dir, arcname=log_id) | |
archive.close() | |
def signal_handler(signal, frame): | |
print('You pressed Ctrl+C') | |
create_tarfile(log_id + '.tar.gz', output_dir) | |
shutil.rmtree(output_dir) | |
sys.exit(1) | |
signal.signal(signal.SIGINT, signal_handler) | |
def run_cmd(category, cmd, s, t): | |
print('Running [', category, ']:', ' '.join(cmd)) | |
start_ts = datetime.datetime.now().isoformat() | |
if (s): | |
output = 'Handed of to shell: ' + ' '.join(cmd) | |
err = os.system(' '.join(cmd)) | |
if (err != 0): | |
print('SHELL EXECUTION FAILED:', ' '.join(cmd)) | |
else: | |
try: | |
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, timeout=t).decode() | |
err = 0 | |
except subprocess.CalledProcessError as e: | |
output = e.output.decode() | |
err = e.returncode | |
print('EXECUTION FAILED:', ' '.join(cmd)) | |
except subprocess.TimeoutExpired as e: | |
print(cmd, ' timed out, output:\n', e.output.decode()) | |
output = e.output.decode() | |
err = -5 | |
print('TIMED OUT:', ' '.join(cmd)) | |
except FileNotFoundError as e: | |
output = ''.join(cmd) | |
err = -1 | |
print('CMD NOT FOUND:', ' '.join(cmd)) | |
end_ts = datetime.datetime.now().isoformat() | |
out_file = output_dir + category + '/' + ''.join(cmd).replace("/", "_") + '.log' | |
log_fd = open(out_file, 'a+') | |
log_fd.write('Started: ' + start_ts + '; Ended: ' + end_ts + '; ERROR: ' + str(err) + '\n') | |
log_fd.write(output) | |
log_fd.close() | |
return err | |
for i in range(len(config)): | |
section = config[i] | |
section.setdefault('enabled', 1) | |
if (section['enabled'] != 1 and section["category"] not in force_enable_categories): | |
continue | |
os.makedirs(output_dir + section['category']) | |
for c in range(len(section['cmds'])): | |
cur = section['cmds'][c] | |
cur.setdefault('enabled', 1) | |
if (cur['enabled'] != 1): | |
continue | |
cur.setdefault('shell', False) | |
if (cur['shell'] == 1): | |
cur['shell'] = True | |
cur.setdefault('timeout', default_timeout) | |
run_cmd(section['category'], cur['cmd'].split(' '), cur['shell'], cur['timeout']) | |
if 'repeat' in cur: | |
cur.setdefault('sleep', 1) | |
for r in range(cur['repeat'] - 1): | |
time.sleep(cur['sleep']) | |
run_cmd(section['category'], cur['cmd'].split(' '), cur['shell'], cur['timeout']) | |
create_tarfile(log_id + '.tar.gz', output_dir) | |
shutil.rmtree(output_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment