Skip to content

Instantly share code, notes, and snippets.

@danielocfb
Created March 11, 2025 16:40
#!/usr/bin/env python
from __future__ import print_function
from bcc import BPF
import argparse
import time
import datetime
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--thresh-ms', type=float, default=100.0,
help='runtime threshold in ms')
parser.add_argument('-b', '--nr_backtraces', type=int, default=64,
help='number of backtraces to capture')
parser.add_argument('-i', '--backtrace-interval-ms', type=float, default=10.0,
help='backtrace capture interval in ms')
parser.add_argument('-k', '--kthread-only', action='store_true',
help='only consider kernel threads')
parser.add_argument('-p', '--percpu-only', action='store_true',
help='only consider percpu threads')
args = parser.parse_args()
NR_BTS = 64
NR_STACKS = 32768
NR_RESULTS = 128
bpf_source = """
#include <linux/sched.h>
BPF_STACK_TRACE(stacks, __NR_STACKS__);
struct running_task {
u64 running_at;
u64 bt_at;
u32 bt[__NR_BTS__];
u32 bt_seq;
u32 seq;
u32 pid;
char comm[TASK_COMM_LEN];
u64 ran_for;
};
BPF_PERCPU_ARRAY(running_task, struct running_task, 1);
BPF_ARRAY(results, struct running_task, __NR_RESULTS__);
BPF_ARRAY(result_seq, unsigned long, 1);
RAW_TRACEPOINT_PROBE(sched_switch)
{
// TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next, struct rq_flags *rf)
struct task_struct *prev = (void *)ctx->args[1];
struct task_struct *next = (void *)ctx->args[2];
s32 cpu = bpf_get_smp_processor_id();
u64 now = bpf_ktime_get_ns();
struct running_task *t;
if (!(t = running_task.lookup(&cpu)))
return 0;
if (t->running_at && prev->pid) {
s64 dur = now - t->running_at;
if (dur > __RUNTIME_THRESH_NS__) {
u32 zero = 0, idx;
unsigned long *rseq, seq;
struct running_task *res;
if (!(rseq = result_seq.lookup(&zero)))
return 0;
seq = __sync_fetch_and_add(rseq, 1) + 1;
idx = seq % __NR_RESULTS__;
if (!(res = results.lookup(&idx)))
return 0;
__builtin_memcpy(res, t, sizeof(*res));
res->pid = prev->pid;
bpf_probe_read_kernel(res->comm, TASK_COMM_LEN, prev->comm);
res->ran_for = dur;
res->seq = seq;
bpf_trace_printk("%s[%d] ran for %lluus", prev->comm, prev->pid, (now - t->running_at) / 1000);
}
}
t->running_at = 0;
t->bt_at = 0;
if (__KTHREAD_ONLY__ && !(next->flags & PF_KTHREAD))
return 0;
if (__PERCPU_ONLY__ && next->nr_cpus_allowed != 1)
return 0;
t->running_at = now;
t->bt_at = now;
t->bt_seq = 0;
return 0;
}
void kprobe__sched_tick(struct pt_regs *ctx)
{
s32 cpu = bpf_get_smp_processor_id();
u64 now = bpf_ktime_get_ns();
struct running_task *t;
u32 stkid, idx;
if (!(t = running_task.lookup(&cpu)))
return;
if (!t->bt_at || now - t->bt_at < __BACKTRACE_INTERVAL_NS__)
return;
idx = t->bt_seq++ % __NR_BTS__;
t->bt[idx] = stacks.get_stackid(ctx, BPF_F_REUSE_STACKID);
t->bt_at = now;
}
"""
bpf_source = bpf_source.replace('__NR_STACKS__', f'{NR_STACKS}')
bpf_source = bpf_source.replace('__NR_RESULTS__', f'{NR_RESULTS}')
bpf_source = bpf_source.replace('__RUNTIME_THRESH_NS__', f'{int(args.thresh_ms * 1000000)}')
bpf_source = bpf_source.replace('__NR_BTS__', f'{args.nr_backtraces}')
bpf_source = bpf_source.replace('__BACKTRACE_INTERVAL_NS__', f'{int(args.backtrace_interval_ms * 1000000)}')
bpf_source = bpf_source.replace('__KTHREAD_ONLY__', f'{int(args.kthread_only)}')
bpf_source = bpf_source.replace('__PERCPU_ONLY__', f'{int(args.percpu_only)}')
bpf = BPF(text=bpf_source)
stacks = bpf["stacks"]
results = bpf['results']
next_seq = 1
def print_stack(stkid):
for addr in stacks.walk(stkid):
sym = bpf.ksym(addr).decode('utf-8')
print(' {}'.format(sym))
while True:
time.sleep(0.1);
now = time.time()
now_str = datetime.datetime.fromtimestamp(now).strftime('%Y%m%d-%H:%M:%S')
while True:
idx = next_seq % NR_RESULTS
r = results[idx]
if r.seq < next_seq:
break
print(f'{r.comm.decode("utf-8")}[{r.pid}] ran_for={r.ran_for/1000000:.2f}ms bt_seq={r.bt_seq}')
bt_start = max(r.bt_seq - args.nr_backtraces, 0)
for bti in range(bt_start, r.bt_seq):
stkid = r.bt[bti % args.nr_backtraces]
print()
print_stack(stkid)
print()
next_seq = r.seq + 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment