Skip to content

Instantly share code, notes, and snippets.

@Kramerican
Last active January 29, 2023 20:47
Show Gist options
  • Save Kramerican/b2bd470cf43c9b72852b891bd9e14007 to your computer and use it in GitHub Desktop.
Save Kramerican/b2bd470cf43c9b72852b891bd9e14007 to your computer and use it in GitHub Desktop.
Quick and dirty LXD container disk i/o monitoring on ZFS filesystems
#DO NOT USE THIS SCRIPT - IT IS GARBAGE. USE NETDATA INSTEAD! IT WILL SOLVE ALL YOUR MONITORING NEEDS AND MORE! https://github.com/firehol/netdata
#!/usr//bin/php
<?php
/*
Quick and dirty command line utility to monitor disk IO for LXD containers
Accepts refresh interval as command line parameter
Understanding columns - taken from https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/proc.txt?id=HEAD#l1305
rchar
-----
I/O counter: chars read
The number of bytes which this task has caused to be read from storage. This
is simply the sum of bytes which this process passed to read() and pread().
It includes things like tty IO and it is unaffected by whether or not actual
physical disk IO was required (the read might have been satisfied from
pagecache)
wchar
-----
I/O counter: chars written
The number of bytes which this task has caused, or shall cause to be written
to disk. Similar caveats apply here as with rchar.
syscr
-----
I/O counter: read syscalls
Attempt to count the number of read I/O operations, i.e. syscalls like read()
and pread().
syscw
-----
I/O counter: write syscalls
Attempt to count the number of write I/O operations, i.e. syscalls like
write() and pwrite().
read_bytes
----------
I/O counter: bytes read
Attempt to count the number of bytes which this process really did cause to
be fetched from the storage layer. Done at the submit_bio() level, so it is
accurate for block-backed filesystems. <please add status regarding NFS and
CIFS at a later time>
write_bytes
-----------
I/O counter: bytes written
Attempt to count the number of bytes which this process caused to be sent to
the storage layer. This is done at page-dirtying time.
cancelled_write_bytes
---------------------
The big inaccuracy here is truncate. If a process writes 1MB to a file and
then deletes the file, it will in fact perform no writeout. But it will have
been accounted as having caused 1MB of write.
In other words: The number of bytes which this process caused to not happen,
by truncating pagecache. A task can cause "negative" IO too. If this task
truncates some dirty pagecache, some IO which another task has been accounted
for (in its write_bytes) will not be happening. We _could_ just subtract that
from the truncating task's write_bytes, but there is information loss in doing
*/
$config = [
"refresh_interval" => ($argv[1] ? $argv[1] : 30), //Number of seconds to wait between polling
];
$headers = [
"Name",
"rchar",
"+change",
"wchar",
"+change",
"syscr",
"+change",
"syscw",
"+change",
"read_bytes",
"+change",
"write_bytes",
"+change"
];
/* Entrypoint */
/*
This is probably dumb - but my logic is: I can get disk read/write on a per-process basis
What I want is to total for a datapool/containers
So, start with getting a list of containers, then descend into the process tree and sum up stats for each and every process
You get these disk stats on a PID basis by doing a watch -n 1 cat /proc/PID/io with the PID it looks like you get (slightly) delayed updates on read/write to that process
Use this in conjunction with zpool iostat -v 1 to see monitor overall read/write for the entire datapool
*/
/* Get PID's of our container processes */
$cmd = "ps -ef | grep \"lxc monitor\" | awk '{print $2}' 2>&1";
exec($cmd,$container_pids,$return_val);
/* Let's prepare our containers data struct */
foreach ($container_pids as $key => $pid) {
//Get the name of the container we are wathing
$parent_name = [];
$cmd = "ps -p ".$pid." -o cmd | awk '{print $4}' 2>&1";
exec($cmd,$parent_name,$return_val);
//If we have a name in index 1, then this is a container, probably.
if (@$parent_name[1]) {
$watching[] = [
"watch_pid" => $pid,
"name" => $parent_name[1],
"stats" => [
"rchar" => 0,
"wchar" => 0,
"syscr" => 0,
"syscw" => 0,
"read_bytes" => 0,
"write_bytes"=> 0
]
];
}
}
/* Alright now we have something to work with, let's output data in a nice view */
$cli = new CLIOutput();
echo chr(27).chr(91).'H'.chr(27).chr(91).'J';
echo "Initial view will be lifetime read/write in +change columns, so expect large values.".PHP_EOL;
//Loop forever untill Ctrl+C
while (true) {
$cli->initView($headers);
$totals = [
"rchar" => 0,
"wchar" => 0,
"syscr" => 0,
"syscw" => 0,
"read_bytes" => 0,
"write_bytes" => 0,
];
foreach ($watching as &$process) {
$children = getAllChildren($process["watch_pid"]);
$diskio = [0,0,0,0,0,0];
//Sum up all IO for children
foreach ($children as $child) {
$childio = returnIOForPid($child);
if (count($childio)) {
$diskio[0] = $diskio[0] + (int)$childio[0];
$diskio[1] = $diskio[1] + (int)$childio[1];
$diskio[2] = $diskio[2] + (int)$childio[2];
$diskio[3] = $diskio[3] + (int)$childio[3];
$diskio[4] = $diskio[4] + (int)$childio[4];
$diskio[5] = $diskio[5] + (int)$childio[5];
}
}
$diffrchar = (int)$diskio[0] - $process["stats"]["rchar"];
$totals["rchar"] = $totals["rchar"] + $diffrchar;
if ($diffrchar > 0) {
$diffrchar = formatBytes($diffrchar);
}
$diffwchar = (int)$diskio[1] - $process["stats"]["wchar"];
$totals["wchar"] = $totals["wchar"] + $diffwchar;
if ($diffwchar > 0) {
$diffwchar = formatBytes($diffwchar);
}
$diffsyscr = (int)$diskio[2] - $process["stats"]["syscr"];
$totals["syscr"] = $totals["syscr"] + $diffsyscr;
if ($diffsyscr > 0) {
$diffsyscr = formatBytes($diffsyscr);
}
$diffsyscw = (int)$diskio[3] - $process["stats"]["syscw"];
$totals["syscw"] = $totals["syscw"] + $diffsyscw;
if ($diffsyscw > 0) {
$diffsyscw = formatBytes($diffsyscw);
}
$diffread_bytes = (int)$diskio[4] - $process["stats"]["read_bytes"];
$totals["read_bytes"] = $totals["read_bytes"] + $diffread_bytes;
if ($diffread_bytes > 0) {
$diffread_bytes = formatBytes($diffread_bytes);
}
$diffwrite_bytes = (int)$diskio[5] - $process["stats"]["write_bytes"];
$totals["write_bytes"] = $totals["write_bytes"] + $diffwrite_bytes;
if ($diffwrite_bytes > 0) {
$diffwrite_bytes = formatBytes($diffwrite_bytes);
}
$cli->_rows[] = [
$process["name"],
$diskio[0],
$diffrchar,
$diskio[1],
$diffwchar,
$diskio[2],
$diffsyscr,
$diskio[3],
$diffsyscw,
$diskio[4],
$diffread_bytes,
$diskio[5],
$diffwrite_bytes
];
$process["stats"]["rchar"] = (int)$diskio[0];
$process["stats"]["wchar"] = (int)$diskio[1];
$process["stats"]["syscr"] = (int)$diskio[2];
$process["stats"]["syscw"] = (int)$diskio[3];
$process["stats"]["read_bytes"] = (int)$diskio[4];
$process["stats"]["write_bytes"] = (int)$diskio[5];
}
//Add totals as last row
$cli->_rows[] = [
"Cumulative",
"",
formatBytes($totals["rchar"]),
"",
formatBytes($totals["wchar"]),
"",
formatBytes($totals["syscr"]),
"",
formatBytes($totals["syscw"]),
"",
formatBytes($totals["read_bytes"]),
"",
formatBytes($totals["write_bytes"])
];
echo chr(27).chr(91).'H'.chr(27).chr(91).'J';
echo "Refreshing every ".$config["refresh_interval"]." seconds".PHP_EOL;
$cli->_generateBodyRows();
sleep($config["refresh_interval"]);
}
/* Helper functions */
function getAllChildren($pid) {
$return = [];
//Gets all child processes of pid
$cmd = "pgrep -P ".$pid." 2>&1";
exec($cmd,$children,$return_val);
if (count($children)) {
foreach ($children as $childPID) {
$return[] = $childPID;
$more = getAllChildren($childPID);
if (count($more)) {
$return = array_merge($return,$more);
}
}
}
return $return;
}
function returnIOForPid($pid) {
$cmd = "cat /proc/".$pid."/io | awk '{print $2}' 2>&1";
exec($cmd,$values,$return_val);
return $values;
}
function formatBytes($bytes, $precision = 2) {
$units = array('B', 'KB', 'MB', 'GB!', 'TB!!');
$bytes = max($bytes, 0);
$pow = floor(($bytes ? log($bytes) : 0) / log(1024));
$pow = min($pow, count($units) - 1);
// Uncomment one of the following alternatives
$bytes /= pow(1024, $pow);
// $bytes /= (1 << (10 * $pow));
return round($bytes, $precision) . ' ' . $units[$pow];
}
class CLIOutput {
private $_headers = [];
public $_rows = [];
public function initView($headers) {
//Clear screen
echo "Gathering stats, please wait ...".PHP_EOL;
$this->_rows = [];
$this->_rows[] = $headers;
}
public function _generateBodyRows()
{
foreach ($this->_rows as $row) {
$this->_renderDataRow($row);
}
}
public function _writeLine($line) {
echo $line.PHP_EOL;
}
public function _renderDataRow($row)
{
if (empty($row)) {
return;
}
$line = '|';
foreach ($row as $column => $name) {
$columnWidth = $this->_getColumnWidth($column);
$spaces = $columnWidth - strlen($name) - 1;
$line .= ' ' . $name . str_repeat(' ', $spaces) . '|';
}
$this->_writeLine($line);
}
public function _getColumnWidth($column)
{
$width = 0;
if (isset($this->_headers[$column])) {
$width = strlen($this->_headers[$column]);
}
array_map(function ($element) use (&$width, $column) {
$length = strlen($element[$column]);
if ($length > $width) {
$width = $length;
}
}, $this->_rows);
return $this->_widthWithSpaces($width);
}
public function _widthWithSpaces($width)
{
return $width + 2;
}
/*
public function _renderRowSeparator()
{
$separator = '+';
foreach ($this->_sampleRow as $column => $header) {
$columnWidth = $this->_getColumnWidth($column);
$separator .= str_repeat('-', $columnWidth) . '+';
}
$this->_output->writeMessage($separator);
}*/
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment