Last active
December 26, 2024 10:49
-
-
Save taoky/6c9d6314062afea45e17009e59b171e4 to your computer and use it in GitHub Desktop.
Show ZFS ARC hit/miss per process with Linux eBPF, modified from cachetop
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Code modified from bcc-tools cachetop | |
from __future__ import absolute_import | |
from __future__ import division | |
# Do not import unicode_literals until #623 is fixed | |
# from __future__ import unicode_literals | |
from __future__ import print_function | |
from bcc import BPF | |
from collections import defaultdict | |
from time import strftime | |
import argparse | |
import curses | |
import pwd | |
import re | |
import signal | |
from time import sleep | |
FIELDS = ( | |
"PID", | |
"UID", | |
"CMD", | |
"HITS", | |
"MISSES", | |
"HIT%", | |
) | |
DEFAULT_FIELD = "HITS" | |
DEFAULT_SORT_FIELD = FIELDS.index(DEFAULT_FIELD) | |
# signal handler | |
def signal_ignore(signal, frame): | |
print() | |
# Function to gather data from /proc/meminfo | |
# return dictionary for quicker lookup of both values | |
def get_arcinfo(): | |
result = {} | |
with open('/proc/spl/kstat/zfs/arcstats') as f: | |
next(f) | |
next(f) | |
for line in f: | |
k = re.split(r'\s+', line, 3) | |
v = k[2].split() | |
result[k[0]] = int(v[0]) | |
return result | |
def get_processes_stats( | |
bpf, | |
sort_field=DEFAULT_SORT_FIELD, | |
sort_reverse=False, | |
htab_batch_ops=False): | |
''' | |
Return a tuple containing: | |
buffer | |
cached | |
list of tuple with per process cache stats | |
''' | |
counts = bpf.get_table("counts") | |
stats = defaultdict(lambda: defaultdict(int)) | |
for k, v in (counts.items_lookup_batch() | |
if htab_batch_ops else counts.items()): | |
stats["%d-%d-%s" % (k.pid, k.uid, k.comm.decode('utf-8', 'replace'))][k.nf] = v.value | |
stats_list = [] | |
for pid, count in sorted(stats.items(), key=lambda stat: stat[0]): | |
hit = 0 | |
miss = 0 | |
for k, v in count.items(): | |
if k == 0: # NF_HIT | |
hit = max(0, v) | |
if k == 1: # NF_MISS | |
miss = max(0, v) | |
hitrate = (hit / (hit + miss)) * 100 if (hit + miss) > 0 else 0 | |
_pid, uid, comm = pid.split('-', 2) | |
stats_list.append( | |
(int(_pid), uid, comm, | |
hit, miss, hitrate)) | |
stats_list = sorted( | |
stats_list, key=lambda stat: stat[sort_field], reverse=sort_reverse | |
) | |
if htab_batch_ops: | |
counts.items_delete_batch() | |
else: | |
counts.clear() | |
return stats_list | |
def handle_loop(stdscr, args): | |
# don't wait on key press | |
stdscr.nodelay(1) | |
# set default sorting field | |
sort_field = FIELDS.index(DEFAULT_FIELD) | |
sort_reverse = True | |
# load BPF program | |
bpf_text = """ | |
#include <uapi/linux/ptrace.h> | |
struct key_t { | |
// NF_{HIT,MISS} | |
u64 nf; | |
u32 pid; | |
u32 uid; | |
char comm[16]; | |
}; | |
enum { | |
NF_HIT, | |
NF_MISS, | |
}; | |
BPF_HASH(counts, struct key_t); | |
static int __do_count(void *ctx, u64 nf) { | |
u32 pid = bpf_get_current_pid_tgid() >> 32; | |
if (FILTER_PID) | |
return 0; | |
struct key_t key = {}; | |
u32 uid = bpf_get_current_uid_gid(); | |
key.nf = nf; | |
key.pid = pid; | |
key.uid = uid; | |
bpf_get_current_comm(&(key.comm), 16); | |
counts.increment(key); | |
return 0; | |
} | |
int do_count_hit(struct pt_regs *ctx) { | |
return __do_count(ctx, NF_HIT); | |
} | |
int do_count_miss(struct pt_regs *ctx) { | |
return __do_count(ctx, NF_MISS); | |
} | |
""" | |
if args.pid: | |
bpf_text = bpf_text.replace('FILTER_PID', 'pid != %d' % args.pid) | |
else: | |
bpf_text = bpf_text.replace('FILTER_PID', '0') | |
b = BPF(text=bpf_text) | |
b.attach_kprobe(event="trace_zfs_arc__hit", fn_name="do_count_hit") | |
b.attach_kprobe(event="trace_zfs_arc__miss", fn_name="do_count_miss") | |
exiting = 0 | |
# check whether hash table batch ops is supported | |
# htab_batch_ops = True if BPF.kernel_struct_has_field(b'bpf_map_ops', | |
# b'map_lookup_and_delete_batch') == 1 else False | |
# Always set to False for now, as it would trigger error on Debian 12. | |
htab_batch_ops = False | |
while 1: | |
s = stdscr.getch() | |
if s == ord('q'): | |
exiting = 1 | |
elif s == ord('r'): | |
sort_reverse = not sort_reverse | |
elif s == ord('<'): | |
sort_field = max(0, sort_field - 1) | |
elif s == ord('>'): | |
sort_field = min(len(FIELDS) - 1, sort_field + 1) | |
try: | |
sleep(args.interval) | |
except KeyboardInterrupt: | |
exiting = 1 | |
# as cleanup can take many seconds, trap Ctrl-C: | |
signal.signal(signal.SIGINT, signal_ignore) | |
# Get memory info | |
mem = get_arcinfo() | |
arcsize = int(mem["size"]) / 1024 / 1024 | |
process_stats = get_processes_stats( | |
b, | |
sort_field=sort_field, | |
sort_reverse=sort_reverse, | |
htab_batch_ops=htab_batch_ops) | |
stdscr.clear() | |
stdscr.addstr( | |
0, 0, | |
"%-8s ARC MB: %.0f " | |
"/ Sort: %s / Order: %s" % ( | |
strftime("%H:%M:%S"), arcsize, FIELDS[sort_field], | |
sort_reverse and "descending" or "ascending" | |
) | |
) | |
# header | |
stdscr.addstr( | |
1, 0, | |
"{0:8} {1:8} {2:16} {3:8} {4:8} {5:10}".format( | |
*FIELDS | |
), | |
curses.A_REVERSE | |
) | |
(height, width) = stdscr.getmaxyx() | |
for i, stat in enumerate(process_stats): | |
uid = int(stat[1]) | |
try: | |
username = pwd.getpwuid(uid)[0] | |
except KeyError: | |
# `pwd` throws a KeyError if the user cannot be found. This can | |
# happen e.g. when the process is running in a cgroup that has | |
# different users from the host. | |
username = 'UNKNOWN({})'.format(uid) | |
stdscr.addstr( | |
i + 2, 0, | |
"{0:8} {username:8.8} {2:16} {3:8} {4:8} " | |
"{5:9.1f}%".format( | |
*stat, username=username | |
) | |
) | |
if i > height - 4: | |
break | |
stdscr.refresh() | |
if exiting: | |
print("Detaching...") | |
return | |
def parse_arguments(): | |
parser = argparse.ArgumentParser( | |
description='Show Linux ZFS ARC hit/miss statistics per processes in a UI like top.' | |
) | |
parser.add_argument("-p", "--pid", type=int, metavar="PID", | |
help="trace this PID only") | |
parser.add_argument( | |
'interval', type=int, default=5, nargs='?', | |
help='Interval between probes.' | |
) | |
args = parser.parse_args() | |
return args | |
args = parse_arguments() | |
curses.wrapper(handle_loop, args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment