DavidGoodwin/pressure.sh

## pressure.sh
#!/bin/bash
# -*- sh -*-

: << =cut

=head1 NAME

Munin plugin to plot Linux CPU pressure - see: https://lwn.net/Articles/759781/

=head1 CONFIGURATION

No configuration

=head1 LICENSE

GPLv2

=head1 MAGIC MARKERS

 #%# family=auto
 #%# capabilities=autoconf

=cut

. $MUNIN_LIBDIR/plugins/plugin.sh

[ ! -d /proc/pressure ] && echo "kernel support missing?" && exit 1

if [ "$1" = "autoconf" ]; then
	if [ -f /proc/pressure/cpu ] ; then
		echo yes
		exit 0
	else
		echo 'No /proc/pressure/cpu ?'
		exit 0
	fi
fi

if [ "$1" = "config" ]; then

	echo 'graph_title CPU Pressure'
	echo 'graph_args --base 1000 -l 0'
	echo 'graph_vlabel time lost '
	echo 'graph_category system'
	echo 'cpu.label CPU stalled time'
	echo 'cpu.type COUNTER'
	echo 'memory.label Memory stalled time'
	echo 'memory.type COUNTER'
	exit 0
fi
# CPU : some avg10=0.00 avg60=0.00 avg300=0.08 total=9338403
# " The avg numbers give the percentage of the time that runnable processes are delayed because the CPU is unavailable ... over 10, 60, and 300 seconds."
# " The final number (total) is the total amount of time (in microseconds) during which processes were stalled. "

# MEMORY : some avg10=0.00 avg60=0.00 avg300=0.00 total=0
#          full avg10=0.00 avg60=0.00 avg300=0.00 total=0

# "The some line is similar to the CPU information: it tracks the percentage of the time that at least one process could be running if it weren't waiting for memory resources. In particular, the time spent for swapping in, refaulting pages from the page cache, and performing direct reclaim is tracked in this way. It is, thus, a good indicator of when the system is thrashing due to a lack of memory."
# "The full line is a little different: it tracks the time that nobody is able to use the CPU for actual work due to memory pressure. If all processes are waiting for paging I/O, the CPU may look idle, but that's not because of a lack of work to do. If those processes are performing memory reclaim, the end result is nearly the same; the CPU is busy, but it's not doing the work that the computer is there to do. If the full numbers are much above zero, it's clear that the system lacks the memory it needs to support the current workload."


cpu=( $(< /proc/pressure/cpu) )
memory=( $(< /proc/pressure/memory) )

# field 4, drop anything up to =
cpu_total=${cpu[4]#*=}

memory_total=${memory[4]#*=}

echo "cpu.value $cpu_total"
echo "memory.value $memory_total"
	#!/bin/bash
	# -- sh --

	: << =cut

	=head1 NAME

	Munin plugin to plot Linux CPU pressure - see: https://lwn.net/Articles/759781/

	=head1 CONFIGURATION

	No configuration

	=head1 LICENSE

	GPLv2

	=head1 MAGIC MARKERS

	#%# family=auto
	#%# capabilities=autoconf

	=cut

	. $MUNIN_LIBDIR/plugins/plugin.sh

	[ ! -d /proc/pressure ] && echo "kernel support missing?" && exit 1

	if [ "$1" = "autoconf" ]; then
	if [ -f /proc/pressure/cpu ] ; then
	echo yes
	exit 0
	else
	echo 'No /proc/pressure/cpu ?'
	exit 0
	fi
	fi

	if [ "$1" = "config" ]; then

	echo 'graph_title CPU Pressure'
	echo 'graph_args --base 1000 -l 0'
	echo 'graph_vlabel time lost '
	echo 'graph_category system'
	echo 'cpu.label CPU stalled time'
	echo 'cpu.type COUNTER'
	echo 'memory.label Memory stalled time'
	echo 'memory.type COUNTER'
	exit 0
	fi
	# CPU : some avg10=0.00 avg60=0.00 avg300=0.08 total=9338403
	# " The avg numbers give the percentage of the time that runnable processes are delayed because the CPU is unavailable ... over 10, 60, and 300 seconds."
	# " The final number (total) is the total amount of time (in microseconds) during which processes were stalled. "

	# MEMORY : some avg10=0.00 avg60=0.00 avg300=0.00 total=0
	# full avg10=0.00 avg60=0.00 avg300=0.00 total=0

	# "The some line is similar to the CPU information: it tracks the percentage of the time that at least one process could be running if it weren't waiting for memory resources. In particular, the time spent for swapping in, refaulting pages from the page cache, and performing direct reclaim is tracked in this way. It is, thus, a good indicator of when the system is thrashing due to a lack of memory."
	# "The full line is a little different: it tracks the time that nobody is able to use the CPU for actual work due to memory pressure. If all processes are waiting for paging I/O, the CPU may look idle, but that's not because of a lack of work to do. If those processes are performing memory reclaim, the end result is nearly the same; the CPU is busy, but it's not doing the work that the computer is there to do. If the full numbers are much above zero, it's clear that the system lacks the memory it needs to support the current workload."



	cpu=( $(< /proc/pressure/cpu) )
	memory=( $(< /proc/pressure/memory) )

	# field 4, drop anything up to =
	cpu_total=${cpu[4]#*=}

	memory_total=${memory[4]#*=}

	echo "cpu.value $cpu_total"
	echo "memory.value $memory_total"