Skip to content

Instantly share code, notes, and snippets.

@beugley
Last active March 14, 2022 00:13
Show Gist options
  • Save beugley/fd14c72bcb63bde16fa2 to your computer and use it in GitHub Desktop.
Save beugley/fd14c72bcb63bde16fa2 to your computer and use it in GitHub Desktop.
Python: convert sar (sadf) to json format
#!/usr/bin/env python
###############################################################################
## sar_to_json.py
##
## Reformats output from sadf to json. sadf must be invoked with the -D
## switch. Timestamps are displayed in ISO6801 format (YYYY-MM-DDThh:mm:ssZ).
## Example: sadf -D -- -A | sar_to_json.py
###############################################################################
import sys
import datetime
import json
from collections import OrderedDict
# Only expected patterns will be processed. All others are ignored.
pattern_3key = ["tps", "pgpgin/s", "runq-sz", "kbmemfree", "frmpg/s",
"kbswpfree", "dentunusd", "proc/s", "pswpin/s", "call/s",
"scall/s", "totsck"]
pattern_4key = ["CPU", "DEV", "IFACE"]
#pattern_4key = ["CPU", "TTY", "DEV", "IFACE"]
# TTY device activity is printed incorrectly by my version of sadf, so it's
# excluded for now.
d1 = OrderedDict()
for line in sys.stdin:
if (line[0] == "#"):
# Get column names from the header line.
# The first 3 columns (host, interval, timestamp) are the key. Some
# sadf options print multiple lines for each key (such as CPU which
# have 1 line per CPU; these have a 4th column in the key).
columns = line[2:].strip().split(';')
key_cols = (3 if columns[3] in pattern_3key else
(4 if columns[3] in pattern_4key else 0))
elif key_cols:
# Get values from the data line.
values = line.strip().split(';')
values[2] = datetime.datetime.utcfromtimestamp(
int(values[2])).isoformat()+'Z'
key = '|'.join(values[:3])
# Add the 3-column key if it doesn't already exist.
if (key not in d1):
d1[key] = OrderedDict()
# Iterate through all values in the data line.
for i,a in enumerate(values):
if (key_cols == 3):
d1[key][columns[i]] = a
else:
if (i < 3):
d1[key][columns[i]] = a
elif (i == 3):
# This data line contains a 4th key field. Create a child
# dictionary to hold all sibling data lines.
key_sub = values[i]
column_sub = columns[i]
if (column_sub not in d1[key]):
d1[key][column_sub] = OrderedDict()
else:
# Add the sibling data lines to the child dictionary.
if (key_sub not in d1[key][column_sub]):
d1[key][column_sub][key_sub] = OrderedDict()
d1[key][column_sub][key_sub][columns[i]] = a
for k,v in d1.iteritems():
print json.dumps(v)
@tomdottom
Copy link

If you have a new(ish) version of sysstat installed the following may be easier.

sadf -j -- -A

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment