Skip to content

Instantly share code, notes, and snippets.

@wasdee
Created December 17, 2022 14:47
Show Gist options
  • Save wasdee/4dbb693b7c0476d176dfc53a2711be28 to your computer and use it in GitHub Desktop.
Save wasdee/4dbb693b7c0476d176dfc53a2711be28 to your computer and use it in GitHub Desktop.
plot top
#!/usr/bin/env bash
now=$(date +"%Y_%m_%d-%H_%M_%S")
top -b -n 120 > "top-120iters-$now.txt"
# this takes 120 iterations of top, which is 120*3 seconds = 6 minutes
"""
parse top output and plot the resource usage
"""
import re
from dataclasses import dataclass
from datetime import datetime
from io import StringIO
from pathlib import Path
from typing import Optional
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
@dataclass
class Position:
start: int
end_header: int
end: Optional[int] = None
@dataclass
class TopIter:
system_wide: str
header: str
positions: Position
data: str = ""
df: Optional[pd.DataFrame] = None
@property
def time(self):
match_ = re.match(r"top - (\d+:\d+:\d+)", self.system_wide)[1]
return datetime.strptime(match_, "%H:%M:%S")
def generate_dataframe(self):
if self.data == "":
raise ValueError("data is empty")
colnames = self.header.split()
# find out colspec
colspecs = [
self.header.index(f" {value} ") + 1
if value != "COMMAND"
else self.header.index(f" {value}") + 1
for value in colnames
]
colspecs = [[cs, cs + len(colname)] for cs, colname in zip(colspecs, colnames)]
colspecs[0][0] = 0
colspecs[-1][1] = 999
# colspecs scan enlarge colspec idx border width = 1
col_text_side = "RLRRRRRRRRRL"
for a, b, c, side in zip(
colspecs[:-2], colspecs[1:-1], colspecs[2:], col_text_side[1:-1]
):
if side == "R":
b[0] = a[1] + 1
elif side == "L":
b[1] = c[0] - 1
# enlarge small colspec
for i, (a, b) in enumerate(colspecs):
if b - a < 3:
# since all small colspec is on the right side, we only need to enlarge the right side
colspecs[i] = [a - 1, b]
# convert to tuple
colspecs = [(a, b) for a, b in colspecs]
df = pd.read_fwf(StringIO(self.data), colspecs=colspecs, names=colnames)
df["Time"] = self.time
self.df = df
return df
def parse_log(filepath):
filepath = Path(filepath)
with filepath.open() as f:
output = f.read()
top_iters: list[TopIter] = []
# match system-wide and header first then greedy match data
pattern = re.compile(r"(top - (.*\n){6})( +PID.*COMMAND)\n")
for match in pattern.finditer(output):
system_wide = match[1]
header = match[3]
iter_ = TopIter(
system_wide.strip(),
header,
positions=Position(match.start(0), match.end(3)),
)
top_iters.append(iter_)
expect_n_iter = int(re.match(r"top-(\d+)iter.*", filepath.stem)[1])
assert len(top_iters) == expect_n_iter
# update end position
for a, b in zip(top_iters[:-1], top_iters[1:]):
a.positions.end = b.positions.start
top_iters[-1].positions.end = len(output)
# extract data
for iter_ in top_iters:
iter_.data = output[iter_.positions.end_header : iter_.positions.end]
# generate dataframe
dfs = [iter_.generate_dataframe() for iter_ in top_iters]
df = pd.concat(dfs, ignore_index=True)
return df
def plot(df):
"""
find top 7 cpu usage process(geometric mean) and plot them
"""
# find top 7 cpu usage process
df_ = (
df.groupby(["COMMAND"])
.agg({"%CPU": lambda x: np.prod(x + 1) - 1})
.sort_values(["%CPU"], ascending=False)
)
top7 = df_.iloc[:7]
top7_index = top7.index
# sort by time
df = df.sort_values(["Time"])
# plot
fig, axs = plt.subplots(2, 1, sharex=True, figsize=(20, 10))
for index in top7_index:
df_sub = df[df["COMMAND"] == index]
axs[0].plot(df_sub["Time"], df_sub["%CPU"], label=index)
axs[1].plot(df_sub["Time"], df_sub["%MEM"], label=index)
axs[0].legend()
axs[1].legend()
axs[0].set_ylabel("CPU(%)")
axs[1].set_ylabel("MEM(%)")
axs[1].set_xlabel("Time")
plt.show()
if "__main__" == __name__:
# parse all *.txt in current directory
dfs = []
for filepath in Path(".").glob("*.txt"):
df = parse_log(filepath)
dfs.append(df)
df = pd.concat(dfs, ignore_index=True)
plot(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment