Skip to content

Instantly share code, notes, and snippets.

@taoky
Last active December 26, 2024 10:49
Show Gist options
  • Save taoky/6c9d6314062afea45e17009e59b171e4 to your computer and use it in GitHub Desktop.
Save taoky/6c9d6314062afea45e17009e59b171e4 to your computer and use it in GitHub Desktop.
Show ZFS ARC hit/miss per process with Linux eBPF, modified from cachetop
#!/usr/bin/env python
# Code modified from bcc-tools cachetop
from __future__ import absolute_import
from __future__ import division
# Do not import unicode_literals until #623 is fixed
# from __future__ import unicode_literals
from __future__ import print_function
from bcc import BPF
from collections import defaultdict
from time import strftime
import argparse
import curses
import pwd
import re
import signal
from time import sleep
FIELDS = (
"PID",
"UID",
"CMD",
"HITS",
"MISSES",
"HIT%",
)
DEFAULT_FIELD = "HITS"
DEFAULT_SORT_FIELD = FIELDS.index(DEFAULT_FIELD)
# signal handler
def signal_ignore(signal, frame):
print()
# Function to gather data from /proc/meminfo
# return dictionary for quicker lookup of both values
def get_arcinfo():
result = {}
with open('/proc/spl/kstat/zfs/arcstats') as f:
next(f)
next(f)
for line in f:
k = re.split(r'\s+', line, 3)
v = k[2].split()
result[k[0]] = int(v[0])
return result
def get_processes_stats(
bpf,
sort_field=DEFAULT_SORT_FIELD,
sort_reverse=False,
htab_batch_ops=False):
'''
Return a tuple containing:
buffer
cached
list of tuple with per process cache stats
'''
counts = bpf.get_table("counts")
stats = defaultdict(lambda: defaultdict(int))
for k, v in (counts.items_lookup_batch()
if htab_batch_ops else counts.items()):
stats["%d-%d-%s" % (k.pid, k.uid, k.comm.decode('utf-8', 'replace'))][k.nf] = v.value
stats_list = []
for pid, count in sorted(stats.items(), key=lambda stat: stat[0]):
hit = 0
miss = 0
for k, v in count.items():
if k == 0: # NF_HIT
hit = max(0, v)
if k == 1: # NF_MISS
miss = max(0, v)
hitrate = (hit / (hit + miss)) * 100 if (hit + miss) > 0 else 0
_pid, uid, comm = pid.split('-', 2)
stats_list.append(
(int(_pid), uid, comm,
hit, miss, hitrate))
stats_list = sorted(
stats_list, key=lambda stat: stat[sort_field], reverse=sort_reverse
)
if htab_batch_ops:
counts.items_delete_batch()
else:
counts.clear()
return stats_list
def handle_loop(stdscr, args):
# don't wait on key press
stdscr.nodelay(1)
# set default sorting field
sort_field = FIELDS.index(DEFAULT_FIELD)
sort_reverse = True
# load BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
struct key_t {
// NF_{HIT,MISS}
u64 nf;
u32 pid;
u32 uid;
char comm[16];
};
enum {
NF_HIT,
NF_MISS,
};
BPF_HASH(counts, struct key_t);
static int __do_count(void *ctx, u64 nf) {
u32 pid = bpf_get_current_pid_tgid() >> 32;
if (FILTER_PID)
return 0;
struct key_t key = {};
u32 uid = bpf_get_current_uid_gid();
key.nf = nf;
key.pid = pid;
key.uid = uid;
bpf_get_current_comm(&(key.comm), 16);
counts.increment(key);
return 0;
}
int do_count_hit(struct pt_regs *ctx) {
return __do_count(ctx, NF_HIT);
}
int do_count_miss(struct pt_regs *ctx) {
return __do_count(ctx, NF_MISS);
}
"""
if args.pid:
bpf_text = bpf_text.replace('FILTER_PID', 'pid != %d' % args.pid)
else:
bpf_text = bpf_text.replace('FILTER_PID', '0')
b = BPF(text=bpf_text)
b.attach_kprobe(event="trace_zfs_arc__hit", fn_name="do_count_hit")
b.attach_kprobe(event="trace_zfs_arc__miss", fn_name="do_count_miss")
exiting = 0
# check whether hash table batch ops is supported
# htab_batch_ops = True if BPF.kernel_struct_has_field(b'bpf_map_ops',
# b'map_lookup_and_delete_batch') == 1 else False
# Always set to False for now, as it would trigger error on Debian 12.
htab_batch_ops = False
while 1:
s = stdscr.getch()
if s == ord('q'):
exiting = 1
elif s == ord('r'):
sort_reverse = not sort_reverse
elif s == ord('<'):
sort_field = max(0, sort_field - 1)
elif s == ord('>'):
sort_field = min(len(FIELDS) - 1, sort_field + 1)
try:
sleep(args.interval)
except KeyboardInterrupt:
exiting = 1
# as cleanup can take many seconds, trap Ctrl-C:
signal.signal(signal.SIGINT, signal_ignore)
# Get memory info
mem = get_arcinfo()
arcsize = int(mem["size"]) / 1024 / 1024
process_stats = get_processes_stats(
b,
sort_field=sort_field,
sort_reverse=sort_reverse,
htab_batch_ops=htab_batch_ops)
stdscr.clear()
stdscr.addstr(
0, 0,
"%-8s ARC MB: %.0f "
"/ Sort: %s / Order: %s" % (
strftime("%H:%M:%S"), arcsize, FIELDS[sort_field],
sort_reverse and "descending" or "ascending"
)
)
# header
stdscr.addstr(
1, 0,
"{0:8} {1:8} {2:16} {3:8} {4:8} {5:10}".format(
*FIELDS
),
curses.A_REVERSE
)
(height, width) = stdscr.getmaxyx()
for i, stat in enumerate(process_stats):
uid = int(stat[1])
try:
username = pwd.getpwuid(uid)[0]
except KeyError:
# `pwd` throws a KeyError if the user cannot be found. This can
# happen e.g. when the process is running in a cgroup that has
# different users from the host.
username = 'UNKNOWN({})'.format(uid)
stdscr.addstr(
i + 2, 0,
"{0:8} {username:8.8} {2:16} {3:8} {4:8} "
"{5:9.1f}%".format(
*stat, username=username
)
)
if i > height - 4:
break
stdscr.refresh()
if exiting:
print("Detaching...")
return
def parse_arguments():
parser = argparse.ArgumentParser(
description='Show Linux ZFS ARC hit/miss statistics per processes in a UI like top.'
)
parser.add_argument("-p", "--pid", type=int, metavar="PID",
help="trace this PID only")
parser.add_argument(
'interval', type=int, default=5, nargs='?',
help='Interval between probes.'
)
args = parser.parse_args()
return args
args = parse_arguments()
curses.wrapper(handle_loop, args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment