LouisJenkinsCS/mpstat_converter.py

## mpstat_converter.py
#!/usr/bin/env python
# coding: utf-8

import argparse
import pandas
import sys
import subprocess
import time
import json
import signal
import numpy as np

parser = argparse.ArgumentParser(description='Takes JSON from mpstat and creates and returns the minimum sample, maximum sample, and a 3D wireframe representation of the samples obtained. Used to characterize the data.')
parser.add_argument('input', metavar='i', type=str, help='The JSON file obtained from mpstat.')
parser.add_argument('--output', metavar='i', type=str, help="The output for the generated graph; Defaults to out.svg", default='out.svg')
args = parser.parse_args()
logfile = open(args.input, 'r+')
logfile.seek(0)
res = json.loads(logfile.read())
data = res['sysstat']['hosts'][0]['statistics']
df = pandas.DataFrame()
dfs = []

for sample in data:
    ts = sample['timestamp']
    loads = sample['cpu-load']
    dfs.append(pandas.DataFrame.from_dict(loads))

cpu_load = {}
for df in dfs:
    mapping = []
    for data in df['cpu']:
        mapping.append(str(data))
    for col in df.columns:
        if col == 'cpu':
            continue
        ix = 0
        for data in df[col]:
            if mapping[ix] not in cpu_load:
                cpu_load[mapping[ix]] = {}
            if col not in cpu_load[mapping[ix]]:
                cpu_load[mapping[ix]][col] = 0
            cpu_load[mapping[ix]][col] += data
            ix += 1

# Print out the average statistics
for x in cpu_load:
    for y in cpu_load[x]:
        cpu_load[x][y] /= len(dfs)
print(pandas.DataFrame.from_dict(cpu_load).transpose())

# Print out the maximum and minimum dataframes
minUsr = 10000000
maxUsr = 0
maxDF = None
minDF = None
for df in dfs:
    allUsr = df['usr'][0]
    if maxUsr < allUsr:
        maxDF = df
        maxUsr = allUsr
    if minUsr > allUsr:
        minDF = df
        minUsr = allUsr

print("Maximum Snapshot:\n", maxDF)
print("Minimum Snapshot:\n", minDF)

# Plot the data
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
N=len(dfs[0])
points = {k: [] for k in range(0, N)}
for df in dfs:
    for col in range(0, N):
        points[col].append(df['usr'][col])

fig = plt.figure(figsize=(20,16))
ax = Axes3D(fig, auto_add_to_figure=False)
fig.add_axes(ax)
ax.set_xlabel('Processor')
ax.set_ylabel('Seconds')
ax.set_zlabel('Utilization (%)')

xs = []
ys = []
zs = []
for n in range(0,N):
    tmp = points[n]
    tmp = sorted(tmp)
    _xs = [n for i in range(0, len(tmp))]
    _ys = [i for i in range(0, len(tmp))]
    _zs = tmp
    d = np.array(list(zip(_xs,_ys,_zs)))
    xs.append(d[:,0])
    ys.append(d[:,1])
    zs.append(np.array(d[:,2]))

xs = np.array(xs)
ys = np.array(ys)
zs = np.array(zs)

ax.plot_wireframe(xs, ys, zs, color='blue')

plt.savefig(args.output)
plt.show()
	#!/usr/bin/env python
	# coding: utf-8

	import argparse
	import pandas
	import sys
	import subprocess
	import time
	import json
	import signal
	import numpy as np

	parser = argparse.ArgumentParser(description='Takes JSON from mpstat and creates and returns the minimum sample, maximum sample, and a 3D wireframe representation of the samples obtained. Used to characterize the data.')
	parser.add_argument('input', metavar='i', type=str, help='The JSON file obtained from mpstat.')
	parser.add_argument('--output', metavar='i', type=str, help="The output for the generated graph; Defaults to out.svg", default='out.svg')
	args = parser.parse_args()
	logfile = open(args.input, 'r+')
	logfile.seek(0)
	res = json.loads(logfile.read())
	data = res['sysstat']['hosts'][0]['statistics']
	df = pandas.DataFrame()
	dfs = []

	for sample in data:
	ts = sample['timestamp']
	loads = sample['cpu-load']
	dfs.append(pandas.DataFrame.from_dict(loads))

	cpu_load = {}
	for df in dfs:
	mapping = []
	for data in df['cpu']:
	mapping.append(str(data))
	for col in df.columns:
	if col == 'cpu':
	continue
	ix = 0
	for data in df[col]:
	if mapping[ix] not in cpu_load:
	cpu_load[mapping[ix]] = {}
	if col not in cpu_load[mapping[ix]]:
	cpu_load[mapping[ix]][col] = 0
	cpu_load[mapping[ix]][col] += data
	ix += 1

	# Print out the average statistics
	for x in cpu_load:
	for y in cpu_load[x]:
	cpu_load[x][y] /= len(dfs)
	print(pandas.DataFrame.from_dict(cpu_load).transpose())

	# Print out the maximum and minimum dataframes
	minUsr = 10000000
	maxUsr = 0
	maxDF = None
	minDF = None
	for df in dfs:
	allUsr = df['usr'][0]
	if maxUsr < allUsr:
	maxDF = df
	maxUsr = allUsr
	if minUsr > allUsr:
	minDF = df
	minUsr = allUsr

	print("Maximum Snapshot:\n", maxDF)
	print("Minimum Snapshot:\n", minDF)

	# Plot the data
	import matplotlib.pyplot as plt
	from mpl_toolkits.mplot3d import Axes3D
	N=len(dfs[0])
	points = {k: [] for k in range(0, N)}
	for df in dfs:
	for col in range(0, N):
	points[col].append(df['usr'][col])

	fig = plt.figure(figsize=(20,16))
	ax = Axes3D(fig, auto_add_to_figure=False)
	fig.add_axes(ax)
	ax.set_xlabel('Processor')
	ax.set_ylabel('Seconds')
	ax.set_zlabel('Utilization (%)')

	xs = []
	ys = []
	zs = []
	for n in range(0,N):
	tmp = points[n]
	tmp = sorted(tmp)
	_xs = [n for i in range(0, len(tmp))]
	_ys = [i for i in range(0, len(tmp))]
	_zs = tmp
	d = np.array(list(zip(_xs,_ys,_zs)))
	xs.append(d[:,0])
	ys.append(d[:,1])
	zs.append(np.array(d[:,2]))

	xs = np.array(xs)
	ys = np.array(ys)
	zs = np.array(zs)

	ax.plot_wireframe(xs, ys, zs, color='blue')

	plt.savefig(args.output)
	plt.show()