stall detector!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bcc-py | |
# | |
# topwaits Show longest off-cpu waits per-stack | |
# | |
# Copyright 2019 Facebook, Inc. | |
# Copyright 2016 Netflix, Inc. | |
# Licensed under the Apache License, Version 2.0 (the "License") | |
# | |
# 13-Jan-2016 Brendan Gregg Wrote offcpu profiler | |
# 27-Nov-2019 Domas Mituzas Gutted most of profiling part and left stall detector | |
from __future__ import print_function | |
from bcc import BPF | |
from sys import stderr | |
from time import sleep, strftime | |
import argparse | |
import signal | |
import time | |
parser = argparse.ArgumentParser( | |
description="Find longest waiting stacks") | |
thread_group = parser.add_mutually_exclusive_group() | |
thread_group.add_argument("-p", "--pid", metavar="PID", dest="tgid", | |
help="trace this PID only", type=int) | |
thread_group.add_argument("-t", "--tid", metavar="TID", dest="pid", | |
help="trace this TID only", type=int) | |
parser.add_argument("--stack-storage-size", default=65536, | |
type=int, | |
help="the number of unique stack traces that can be stored and " | |
"displayed (default 65536)") | |
parser.add_argument("duration", nargs="?", default=99999999, | |
type=int, | |
help="duration of trace, in seconds") | |
parser.add_argument("-m", "--min-block-time", default=1, | |
type=int, | |
help="the amount of time in microseconds over which we " + | |
"store traces (default 1)") | |
parser.add_argument("-M", "--max-block-time", default=(1 << 64) - 1, | |
type=int, | |
help="the amount of time in microseconds under which we " + | |
"store traces (default U64_MAX)") | |
parser.add_argument("--state", type=int, | |
help="filter on this thread state bitmask (eg, 2 == TASK_UNINTERRUPTIBLE" + | |
") see include/linux/sched.h") | |
args = parser.parse_args() | |
if args.pid and args.tgid: | |
parser.error("specify only one of -p and -t") | |
duration = int(args.duration) | |
# define BPF program | |
bpf_text = """ | |
#include <uapi/linux/ptrace.h> | |
#include <linux/sched.h> | |
#define MINBLOCK_US MINBLOCK_US_VALUEULL | |
#define MAXBLOCK_US MAXBLOCK_US_VALUEULL | |
struct key_t { | |
u32 pid; | |
int user_stack_id; | |
int kernel_stack_id; | |
}; | |
BPF_HASH(counts, struct key_t); | |
BPF_HASH(maximums, struct key_t); | |
BPF_HASH(timestamps, struct key_t); | |
BPF_HASH(start, u32); | |
BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE); | |
int oncpu(struct pt_regs *ctx, struct task_struct *prev) { | |
u32 pid = prev->pid; | |
u32 tgid = prev->tgid; | |
u64 ts, *tsp, *valp; | |
// record previous thread sleep time | |
ts = bpf_ktime_get_ns(); | |
if ((THREAD_FILTER) && (STATE_FILTER)) { | |
start.update(&pid, &ts); | |
} | |
// get the current thread's start time | |
pid = bpf_get_current_pid_tgid(); | |
tgid = bpf_get_current_pid_tgid() >> 32; | |
tsp = start.lookup(&pid); | |
if (tsp == 0) { | |
return 0; // missed start or filtered | |
} | |
// calculate current thread's delta time in nanos | |
u64 delta = ts - *tsp; | |
start.delete(&pid); | |
// nanos to micros | |
delta = delta / 1000; | |
if ((delta < MINBLOCK_US) || (delta > MAXBLOCK_US)) { | |
return 0; | |
} | |
// create map key | |
struct key_t key = {}; | |
key.pid = tgid; | |
key.user_stack_id = stack_traces.get_stackid(ctx, BPF_F_USER_STACK); | |
key.kernel_stack_id = stack_traces.get_stackid(ctx, 0); | |
counts.increment(key, delta); | |
valp = maximums.lookup(&key); | |
if (valp == 0 || *valp < delta) { | |
maximums.update(&key, &delta); | |
timestamps.update(&key, &ts); | |
} | |
return 0; | |
} | |
""" | |
thread_filter = '1' | |
# set thread filter | |
if args.tgid is not None: | |
thread_filter = 'tgid == %d' % args.tgid | |
elif args.pid is not None: | |
thread_filter = 'pid == %d' % args.pid | |
if args.state == 0: | |
state_filter = 'prev->state == 0' | |
elif args.state: | |
# these states are sometimes bitmask checked | |
state_filter = 'prev->state & %d' % args.state | |
else: | |
state_filter = '1' | |
bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter) | |
bpf_text = bpf_text.replace('STATE_FILTER', state_filter) | |
# set stack storage size | |
bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size)) | |
bpf_text = bpf_text.replace('MINBLOCK_US_VALUE', str(args.min_block_time)) | |
bpf_text = bpf_text.replace('MAXBLOCK_US_VALUE', str(args.max_block_time)) | |
# initialize BPF | |
b = BPF(text=bpf_text) | |
b.attach_kprobe(event="finish_task_switch", fn_name="oncpu") | |
matched = b.num_open_kprobes() | |
if matched == 0: | |
print("error: 0 functions traced. Exiting.", file=stderr) | |
exit(1) | |
try: | |
sleep(duration) | |
except KeyboardInterrupt: | |
# as cleanup can take many seconds, trap Ctrl-C: | |
pass | |
signal.signal(signal.SIGINT, signal.SIG_IGN) | |
counts = b.get_table("counts") | |
stack_traces = b.get_table("stack_traces") | |
maximums = b.get_table("maximums") | |
timestamps = b.get_table("timestamps") | |
basetime = time.time() - float(open("/proc/uptime").read().split()[0]) | |
def memoize(f): | |
class memodict(dict): | |
def __init__(self, f): | |
self.f = f | |
def __call__(self, *args): | |
return self[args] | |
def __missing__(self, key): | |
ret = self[key] = self.f(*key) | |
return ret | |
return memodict(f) | |
@memoize | |
def get_sym(addr, pid): | |
val = b.sym(addr, pid) | |
return val.decode().split("(")[0] | |
for k, v in counts.items(): | |
user_stack = tuple([] if k.user_stack_id < 0 else \ | |
[get_sym(x, k.pid) for x in stack_traces.walk(k.user_stack_id)]) | |
kernel_stack = tuple([] if k.kernel_stack_id < 0 else \ | |
[b.ksym(x) for x in stack_traces.walk(k.kernel_stack_id)]) | |
ts = timestamps[k].value / 1000000000 + basetime | |
print(v.value, maximums[k].value, ts, kernel_stack, user_stack) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment