mctop bcc/bpftrace implementations
Disassembly of section .bpf.fn.trace_entry: | |
trace_entry: | |
; int trace_entry(struct pt_regs *ctx) { // Line 83 | |
0: b7 03 00 00 00 00 00 00 r3 = 0 | |
; struct keyhit_t keyhit = {0}; // Line 88 | |
1: 7b 3a f8 ff 00 00 00 00 *(u64 *)(r10 - 8) = r3 | |
2: 7b 3a f0 ff 00 00 00 00 *(u64 *)(r10 - 16) = r3 | |
3: 7b 3a e8 ff 00 00 00 00 *(u64 *)(r10 - 24) = r3 | |
4: 7b 3a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r3 | |
5: 7b 3a d8 ff 00 00 00 00 *(u64 *)(r10 - 40) = r3 | |
6: 7b 3a d0 ff 00 00 00 00 *(u64 *)(r10 - 48) = r3 | |
7: 7b 3a c8 ff 00 00 00 00 *(u64 *)(r10 - 56) = r3 | |
8: 7b 3a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r3 | |
9: 7b 3a b8 ff 00 00 00 00 *(u64 *)(r10 - 72) = r3 | |
10: 7b 3a b0 ff 00 00 00 00 *(u64 *)(r10 - 80) = r3 | |
; struct value_t *valp, zero = {}; // Line 89 | |
11: 7b 3a a8 ff 00 00 00 00 *(u64 *)(r10 - 88) = r3 | |
12: 7b 3a a0 ff 00 00 00 00 *(u64 *)(r10 - 96) = r3 | |
13: 7b 3a 98 ff 00 00 00 00 *(u64 *)(r10 - 104) = r3 | |
14: 7b 3a 90 ff 00 00 00 00 *(u64 *)(r10 - 112) = r3 | |
15: 7b 3a 88 ff 00 00 00 00 *(u64 *)(r10 - 120) = r3 | |
; switch(PT_REGS_IP(ctx)) { // Line 24 | |
16: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
17: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
19: 1d 42 05 00 00 00 00 00 if r2 == r4 goto +5 | |
20: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
22: 5d 42 04 00 00 00 00 00 if r2 != r4 goto +4 | |
; case 0x56441ad30a66ULL: *((uint64_t *)dest) = ctx->si; __asm__ __volatile__("": : :"memory"); return 0; // Line 25 | |
23: 79 13 68 00 00 00 00 00 r3 = *(u64 *)(r1 + 104) | |
24: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((uint64_t *)dest) = ctx->si; __asm__ __volatile__("": : :"memory"); return 0; // Line 26 | |
25: 79 13 68 00 00 00 00 00 r3 = *(u64 *)(r1 + 104) | |
26: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
27: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
; switch(PT_REGS_IP(ctx)) { // Line 33 | |
29: 1d 42 06 00 00 00 00 00 if r2 == r4 goto +6 | |
30: b7 07 00 00 00 00 00 00 r7 = 0 | |
31: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
33: 5d 42 03 00 00 00 00 00 if r2 != r4 goto +3 | |
; case 0x56441ad30a66ULL: *((uint8_t *)dest) = ctx->cx; __asm__ __volatile__("": : :"memory"); return 0; // Line 34 | |
34: 79 17 58 00 00 00 00 00 r7 = *(u64 *)(r1 + 88) | |
35: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((uint8_t *)dest) = ctx->cx; __asm__ __volatile__("": : :"memory"); return 0; // Line 35 | |
36: 79 17 58 00 00 00 00 00 r7 = *(u64 *)(r1 + 88) | |
37: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
; switch(PT_REGS_IP(ctx)) { // Line 42 | |
38: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
40: 1d 42 06 00 00 00 00 00 if r2 == r4 goto +6 | |
41: b7 08 00 00 00 00 00 00 r8 = 0 | |
42: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
44: 5d 42 03 00 00 00 00 00 if r2 != r4 goto +3 | |
; case 0x56441ad30a66ULL: *((int32_t *)dest) = ctx->ax; __asm__ __volatile__("": : :"memory"); return 0; // Line 43 | |
45: 79 18 50 00 00 00 00 00 r8 = *(u64 *)(r1 + 80) | |
46: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((int32_t *)dest) = ctx->ax; __asm__ __volatile__("": : :"memory"); return 0; // Line 44 | |
47: 79 18 50 00 00 00 00 00 r8 = *(u64 *)(r1 + 80) | |
48: bf 71 00 00 00 00 00 00 r1 = r7 | |
; probe_read_size = keysize < sizeof(keyhit.keystr) ? keysize : sizeof(keyhit.keystr); // Line 96 | |
49: 57 01 00 00 ff 00 00 00 r1 &= 255 | |
50: b7 04 00 00 50 00 00 00 r4 = 80 | |
51: bf 72 00 00 00 00 00 00 r2 = r7 | |
52: 2d 14 01 00 00 00 00 00 if r4 > r1 goto +1 | |
53: b7 02 00 00 50 00 00 00 r2 = 80 | |
; bpf_probe_read(&keyhit.keystr, probe_read_size, (void *)keystr); // Line 102 | |
54: 57 02 00 00 ff 00 00 00 r2 &= 255 | |
55: bf a6 00 00 00 00 00 00 r6 = r10 | |
; uint8_t probe_read_size = 0; // Line 90 | |
56: 07 06 00 00 b0 ff ff ff r6 += -80 | |
; bpf_probe_read(&keyhit.keystr, probe_read_size, (void *)keystr); // Line 102 | |
57: bf 61 00 00 00 00 00 00 r1 = r6 | |
58: 85 00 00 00 04 00 00 00 call 4 | |
; valp = ({typeof(keyhits.leaf) *leaf = bpf_map_lookup_elem_(bpf_pseudo_fd(1, -1), &keyhit); if (!leaf) { bpf_map_update_elem_(bpf_pseudo_fd(1, -1), &keyhit, &zero, BPF_NOEXIST); leaf = bpf_map_lookup_elem_(bpf_pseudo_fd(1, -1), &keyhit); if (!leaf) return 0;}leaf;}); // Line 104 | |
59: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
61: bf 62 00 00 00 00 00 00 r2 = r6 | |
62: 85 00 00 00 01 00 00 00 call 1 | |
63: bf 06 00 00 00 00 00 00 r6 = r0 | |
64: 55 06 0f 00 00 00 00 00 if r6 != 0 goto +15 | |
65: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
67: bf a6 00 00 00 00 00 00 r6 = r10 | |
68: 07 06 00 00 b0 ff ff ff r6 += -80 | |
69: bf a3 00 00 00 00 00 00 r3 = r10 | |
70: 07 03 00 00 88 ff ff ff r3 += -120 | |
71: bf 62 00 00 00 00 00 00 r2 = r6 | |
72: b7 04 00 00 01 00 00 00 r4 = 1 | |
73: 85 00 00 00 02 00 00 00 call 2 | |
74: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
76: bf 62 00 00 00 00 00 00 r2 = r6 | |
77: 85 00 00 00 01 00 00 00 call 1 | |
78: bf 06 00 00 00 00 00 00 r6 = r0 | |
79: 15 06 0d 00 00 00 00 00 if r6 == 0 goto +13 | |
; valp->keysize = keysize; // Line 107 | |
80: 57 07 00 00 ff 00 00 00 r7 &= 255 | |
81: 7b 76 18 00 00 00 00 00 *(u64 *)(r6 + 24) = r7 | |
; valp->count++; // Line 105 | |
82: 79 61 00 00 00 00 00 00 r1 = *(u64 *)(r6 + 0) | |
83: 07 01 00 00 01 00 00 00 r1 += 1 | |
84: 7b 16 00 00 00 00 00 00 *(u64 *)(r6 + 0) = r1 | |
; valp->bytecount = bytecount; // Line 106 | |
85: 67 08 00 00 20 00 00 00 r8 <<= 32 | |
86: c7 08 00 00 20 00 00 00 r8 s>>= 32 | |
87: 7b 86 08 00 00 00 00 00 *(u64 *)(r6 + 8) = r8 | |
; valp->totalbytes += bytecount; // Line 108 | |
88: 79 61 10 00 00 00 00 00 r1 = *(u64 *)(r6 + 16) | |
89: 0f 81 00 00 00 00 00 00 r1 += r8 | |
90: 7b 16 10 00 00 00 00 00 *(u64 *)(r6 + 16) = r1 | |
; valp->timestamp = bpf_ktime_get_ns(); // Line 109 | |
91: 85 00 00 00 05 00 00 00 call 5 | |
92: 7b 06 20 00 00 00 00 00 *(u64 *)(r6 + 32) = r0 | |
; } // Line 113 | |
93: b7 00 00 00 00 00 00 00 r0 = 0 | |
94: 95 00 00 00 00 00 00 00 exit | |
FROM alpine:3.10 as builder | |
# ensure SASL's "libplain.so" is installed as per https://github.com/memcached/memcached/wiki/SASLHowto | |
RUN apk add --no-cache cyrus-sasl-plain wget binutils | |
ENV MEMCACHED_VERSION 1.5.19 | |
ENV MEMCACHED_SHA1 14e6a02e743838696fcb620edf6a2fd7e60cabec | |
# Get dtrace dependencies for alpine in a kinda hacky way | |
RUN mkdir /build && cd /build && wget http://launchpadlibrarian.net/251391227/systemtap-sdt-dev_2.9-2ubuntu2_amd64.deb && \ | |
ar x systemtap-sdt-dev_2.9-2ubuntu2_amd64.deb && \ | |
tar -xpf data.tar.xz && mv usr/bin/dtrace /usr/bin/dtrace && \ | |
mkdir -p /usr/include/sys && \ | |
mv usr/include/x86_64-linux-gnu/sys/* /usr/include/sys && rm -rf /build | |
RUN set -x \ | |
\ | |
&& apk add --no-cache --virtual .build-deps \ | |
ca-certificates \ | |
coreutils \ | |
cyrus-sasl-dev \ | |
dpkg-dev dpkg \ | |
gcc \ | |
libc-dev \ | |
libevent-dev \ | |
linux-headers \ | |
make \ | |
openssl \ | |
perl \ | |
perl-utils \ | |
tar \ | |
wget \ | |
\ | |
&& wget -O memcached.tar.gz "https://memcached.org/files/memcached-$MEMCACHED_VERSION.tar.gz" \ | |
&& echo "$MEMCACHED_SHA1 memcached.tar.gz" | sha1sum -c - \ | |
&& mkdir -p /usr/src/memcached \ | |
&& tar -xzf memcached.tar.gz -C /usr/src/memcached --strip-components=1 \ | |
&& rm memcached.tar.gz \ | |
\ | |
&& cd /usr/src/memcached \ | |
\ | |
&& gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" \ | |
&& enableExtstore="$( \ | |
# https://github.com/docker-library/memcached/pull/38 | |
case "$gnuArch" in \ | |
# https://github.com/memcached/memcached/issues/381 "--enable-extstore on s390x (IBM System Z mainframe architecture) fails tests" | |
s390x-*) ;; \ | |
*) echo '--enable-extstore' ;; \ | |
esac \ | |
)" \ | |
&& ./configure \ | |
CFLAGS="-ggdb3" \ | |
CXXFLAGS="-ggdb3" \ | |
LDFLAGS="-ggdb3" \ | |
--build="$gnuArch" \ | |
--enable-sasl \ | |
--enable-sasl-pwdb \ | |
--enable-dtrace \ | |
$enableExtstore \ | |
&& make -j "$(nproc)" \ | |
\ | |
&& make test \ | |
&& make install \ | |
&& memcached -V | |
FROM alpine:3.10 | |
ENV UID 11211 | |
ENV GID 11211 | |
RUN apk update && apk upgrade && apk add --no-cache coreutils \ | |
libevent \ | |
libsasl \ | |
file \ | |
bash && \ | |
# add our user and group first to make sure their IDs get assigned consistently, regardless of whatever dependencies get added | |
addgroup -g $GID memcache && adduser -D -u $UID -G memcache memcache | |
COPY --from=builder /usr/local/bin/memcached /usr/local/bin/memcached | |
COPY health.sh /usr/local/bin/ | |
ENTRYPOINT ["memcached"] | |
CMD ["memcached"] | |
USER $UID:$GID |
BEGIN | |
{ | |
printf("%-20s %10s %10s %10s\n", "MEMCACHED KEY", "CALLS", "OBJSIZE", "REQ/s"); | |
@start = nsecs; | |
} | |
// NOTE - this presently omits incr, decr, and delete because they have a | |
// different signature | |
usdt::memcached:command__get, | |
usdt::memcached:command__set, | |
usdt::memcached:command__add, | |
usdt::memcached:command__append, | |
usdt::memcached:command__prepend, | |
usdt::memcached:command__touch, | |
usdt::memcached:command__cas, | |
usdt::memcached:command__replace | |
{ | |
@calls[str(arg1, arg2)]++; | |
$objsize = arg3; | |
$interval = (nsecs - @start) / 1000000000; | |
$cps = @calls[str(arg1)] / $interval; | |
printf("%-20s %10d %10d %10d\n", str(arg1, arg2), @calls[str(arg1, arg2)], | |
$objsize, $cps) | |
} | |
END | |
{ | |
clear(@start); | |
clear(@calls); | |
} |
#!/usr/bin/python | |
# @lint-avoid-python-3-compatibility-imports | |
# | |
# mctop Memcached key operation analysis tool | |
# For Linux, uses BCC, eBPF. | |
# | |
# USAGE: mctop.py -p PID | |
# | |
# This uses in-kernel eBPF maps to trace and analyze key access rates and | |
# objects. This can help to spot hot keys, and tune memcached usage for | |
# performance. | |
# | |
# Copyright 2019 Shopify, Inc. | |
# Licensed under the Apache License, Version 2.0 (the "License") | |
# | |
# 17-Nov-2019 Dale Hamel Created this. | |
# Inspired by the ruby tool of the same name by Marcus Barczak in 2012, | |
# see https://codeascraft.com/2012/12/13/mctop-a-tool-for-analyzing-memcache-get-traffic/ | |
# see also https://github.com/tumblr/memkeys | |
from __future__ import print_function | |
from time import sleep, strftime, monotonic | |
from bcc import BPF, USDT, utils | |
from subprocess import call | |
import argparse | |
import sys | |
import select | |
import tty | |
import termios | |
import pickle | |
# FIXME better help | |
# arguments | |
examples = """examples: | |
./mctop -p PID # memcached usage top, 1 second refresh | |
""" | |
parser = argparse.ArgumentParser( | |
description="Memcached top key analysis", | |
formatter_class=argparse.RawDescriptionHelpFormatter, | |
epilog=examples) | |
parser.add_argument("-p", "--pid", type=int, help="process id to attach to") | |
parser.add_argument("-o", "--output", action="store", | |
help="save map data to pickle file dump command is issued") # FIXME make this JSON | |
parser.add_argument("-C", "--noclear", action="store_true", | |
help="don't clear the screen") | |
parser.add_argument("-r", "--maxrows", default=20, | |
help="maximum rows to print, default 20") | |
parser.add_argument("interval", nargs="?", default=1, | |
help="output interval, in seconds") | |
parser.add_argument("count", nargs="?", default=99999999, | |
help="number of outputs") | |
parser.add_argument("--ebpf", action="store_true", | |
help=argparse.SUPPRESS) | |
# FIXME clean this up | |
args = parser.parse_args() | |
interval = int(args.interval) | |
countdown = int(args.count) | |
maxrows = int(args.maxrows) | |
clear = not int(args.noclear) | |
outfile = args.output | |
pid = args.pid | |
old_settings = termios.tcgetattr(sys.stdin) | |
sort_mode = "C" | |
sort_ascending = True | |
exiting = 0 | |
first_loop = True | |
sort_modes = { | |
"C" : "calls", # total calls to key | |
"S" : "size", # latest size of key | |
"R" : "req/s", # requests per second to this key | |
"B" : "bw", # total bytes accesses on this key | |
"N" : "ts" # timestamp of the latest access | |
} | |
commands = { | |
"T" : "toggle", # sorting by ascending / descending order | |
"D" : "dump", # clear eBPF maps and dump to disk (if set) | |
"Q" : "quit" # exit mctop | |
} | |
# FIXME have helper to generate per type? | |
# load BPF program | |
bpf_text = """ | |
#include <uapi/linux/ptrace.h> | |
#include <bcc/proto.h> | |
#define MAX_STRING_LENGTH 80 | |
// Must match python definitions | |
typedef enum {START, END, GET, ADD, SET, REPLACE, PREPEND, APPEND, | |
TOUCH, CAS, INCR, DECR, DELETE} memcached_op_t; | |
struct keyhit_t { | |
char keystr[MAX_STRING_LENGTH]; | |
}; | |
struct value_t { | |
u64 count; | |
u64 bytecount; | |
u64 totalbytes; | |
u64 keysize; | |
u64 timestamp; | |
}; | |
BPF_HASH(keyhits, struct keyhit_t, struct value_t); | |
int trace_entry(struct pt_regs *ctx) { | |
u64 keystr = 0; | |
int32_t bytecount = 0; // type is -4@%eax in stap notes, which is int32 | |
uint8_t keysize = 0; // type is 1@%cl, which should be uint8 | |
struct keyhit_t keyhit = {0}; | |
struct value_t *valp, zero = {}; | |
uint8_t probe_read_size = 0; | |
bpf_usdt_readarg(2, ctx, &keystr); | |
bpf_usdt_readarg(3, ctx, &keysize); | |
bpf_usdt_readarg(4, ctx, &bytecount); | |
probe_read_size = keysize < sizeof(keyhit.keystr) ? keysize : sizeof(keyhit.keystr); | |
// see https://github.com/memcached/memcached/issues/576 | |
// ideally per https://github.com/iovisor/bcc/issues/1260 we should be able to | |
// read just the size we need, but this doesn't seem possible and throws a | |
// verifier error | |
bpf_probe_read(&keyhit.keystr, probe_read_size, (void *)keystr); | |
valp = keyhits.lookup_or_init(&keyhit, &zero); | |
valp->count++; | |
valp->bytecount = bytecount; | |
valp->keysize = keysize; | |
valp->totalbytes += bytecount; | |
valp->timestamp = bpf_ktime_get_ns(); | |
return 0; | |
} | |
""" | |
# Since it is possible that we read the keys incorrectly, we need to fix the | |
# hash keys and combine their values intelligently here, producing a new hash | |
# see https://github.com/memcached/memcached/issues/576 | |
# A possible solution may be in flagging to the verifier that the size given | |
# by a usdt argument is less than the buffer size, | |
# see https://github.com/iovisor/bcc/issues/1260#issuecomment-406365168 | |
def reconcile_keys(bpf_map): | |
new_map = {} | |
for k,v in bpf_map.items(): | |
shortkey = k.keystr[:v.keysize].decode('utf-8', 'replace') | |
if shortkey in new_map: | |
# Sum counts on key collision | |
new_map[shortkey]['count'] += v.count | |
new_map[shortkey]['totalbytes'] += v.totalbytes | |
# If there is a key collision, take the data for the latest one | |
if v.timestamp > new_map[shortkey]['timestamp']: | |
new_map[shortkey]['bytecount'] = v.bytecount | |
new_map[shortkey]['timestamp'] = v.timestamp | |
else: | |
new_map[shortkey] = { | |
"count": v.count, | |
"bytecount": v.bytecount, | |
"totalbytes": v.totalbytes, | |
"timestamp": v.timestamp, | |
} | |
return new_map | |
def sort_output(unsorted_map): | |
global sort_mode | |
global sort_ascending | |
output = unsorted_map | |
if sort_mode == "C": | |
output = sorted(output.items(), key=lambda x: x[1]['count']) | |
elif sort_mode == "S": | |
output = sorted(output.items(), key=lambda x: x[1]['bytecount']) | |
elif sort_mode == "R": | |
output = sorted(output.items(), key=lambda x: x[1]['bandwidth']) | |
elif sort_mode == "B": | |
output = sorted(output.items(), key=lambda x: x[1]['cps']) | |
elif sort_mode == "N": | |
output = sorted(output.items(), key=lambda x: x[1]['timestamp']) | |
if sort_ascending: | |
output = reversed(output) | |
return output | |
# Set stdin to non-blocking reads so we can poll for chars | |
def readKey(interval): | |
new_settings = termios.tcgetattr(sys.stdin) | |
new_settings[3] = new_settings[3] & ~(termios.ECHO | termios.ICANON) | |
tty.setcbreak(sys.stdin.fileno()) | |
if select.select([sys.stdin], [], [], 5) == ([sys.stdin], [], []): | |
key = sys.stdin.read(1).lower() | |
global sort_mode | |
if key == 't': | |
global sort_ascending | |
sort_ascending = not sort_ascending | |
elif key == 'c': | |
sort_mode= 'C' | |
elif key == 's': | |
sort_mode= 'S' | |
elif key == 'r': | |
sort_mode= 'R' | |
elif key == 'b': | |
sort_mode= 'B' | |
elif key == 'n': | |
sort_mode= 'N' | |
elif key == 'd': | |
global outfile | |
global bpf | |
global sorted_output | |
keyhits = bpf.get_table("keyhits") | |
out = open ('/tmp/%s.mcdump' % outfile, 'wb') | |
pickle.dump(sorted_output, out) | |
out.close | |
keyhits.clear() | |
elif key == 'q': | |
global exiting | |
exiting = 1 | |
if args.ebpf: | |
print(bpf_text) | |
exit() | |
usdt = USDT(pid=pid) | |
usdt.enable_probe(probe="command__set", fn_name="trace_entry") # FIXME use fully specified version, port this to python | |
bpf = BPF(text=bpf_text, usdt_contexts=[usdt]) | |
start = monotonic(); # FIXME would prefer monotonic_ns, if 3.7+ | |
while 1: | |
try: | |
if not first_loop: | |
readKey(interval) | |
else: | |
first_loop = False | |
except KeyboardInterrupt: | |
exiting = 1 | |
# header | |
if clear: | |
print("\033c", end="") | |
print("%-30s %8s %8s %8s %8s %8s" % ("MEMCACHED KEY", "CALLS", | |
"OBJSIZE", "REQ/S", | |
"BW(kbps)", "TOTAL") ) | |
keyhits = bpf.get_table("keyhits") | |
line = 0 | |
interval = monotonic() - start; | |
fixed_map = reconcile_keys(keyhits) | |
for k,v in fixed_map.items(): | |
fixed_map[k]['cps'] = v['count'] / interval; | |
fixed_map[k]['bandwidth'] = (v['totalbytes'] / 1000) / interval; | |
sorted_output = sort_output(fixed_map) | |
for i, tup in enumerate(sorted_output): # FIXME sort this | |
k = tup[0]; v = tup[1] | |
print("%-30s %8d %8d %8f %8f %8d" % (k, v['count'], v['bytecount'], | |
v['cps'], v['bandwidth'], | |
v['totalbytes']) ) | |
line += 1 | |
if line >= maxrows: | |
break | |
print((maxrows - line) * "\r\n") | |
sys.stdout.write("[Curr: %s/%s Opt: %s:%s|%s:%s|%s:%s|%s:%s|%s:%s]" % | |
(sort_mode, | |
"Asc" if sort_ascending else "Dsc", | |
'C', sort_modes['C'], | |
'S', sort_modes['S'], | |
'R', sort_modes['R'], | |
'B', sort_modes['B'], | |
'N', sort_modes['N'] | |
)) | |
sys.stdout.write("[%s:%s %s:%s %s:%s]" % ( | |
'T', commands['T'], | |
'D', commands['D'], | |
'Q', commands['Q'] | |
)) | |
print("\033[%d;%dH" % (0, 0)) | |
if exiting: | |
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) | |
print("\033c", end="") | |
exit() |
#!/usr/bin/python | |
# @lint-avoid-python-3-compatibility-imports | |
# | |
# mctop Memcached key operation analysis tool | |
# For Linux, uses BCC, eBPF. | |
# | |
# USAGE: mctop.py -p PID | |
# | |
# This uses in-kernel eBPF maps to trace and analyze key access rates and | |
# objects. This can help to spot hot keys, and tune memcached usage for | |
# performance. | |
# | |
# Copyright 2019 Shopify, Inc. | |
# Licensed under the Apache License, Version 2.0 (the "License") | |
# | |
# 17-Nov-2019 Dale Hamel Created this. | |
# Inspired by the ruby tool of the same name by Marcus Barczak in 2012, | |
# see https://codeascraft.com/2012/12/13/mctop-a-tool-for-analyzing-memcache-get-traffic/ | |
# see also https://github.com/tumblr/memkeys | |
from __future__ import print_function | |
from time import sleep, strftime, monotonic | |
from bcc import BPF, USDT, utils | |
from subprocess import call | |
import argparse | |
import sys | |
import select | |
import tty | |
import termios | |
import pickle | |
# FIXME better help | |
# arguments | |
examples = """examples: | |
./mctop -p PID # memcached usage top, 1 second refresh | |
""" | |
parser = argparse.ArgumentParser( | |
description="Memcached top key analysis", | |
formatter_class=argparse.RawDescriptionHelpFormatter, | |
epilog=examples) | |
parser.add_argument("-p", "--pid", type=int, help="process id to attach to") | |
parser.add_argument("-o", "--output", action="store", | |
help="save map data to pickle file dump command is issued") # FIXME make this JSON | |
parser.add_argument("-C", "--noclear", action="store_true", | |
help="don't clear the screen") | |
parser.add_argument("-r", "--maxrows", default=20, | |
help="maximum rows to print, default 20") | |
parser.add_argument("interval", nargs="?", default=1, | |
help="output interval, in seconds") | |
parser.add_argument("count", nargs="?", default=99999999, | |
help="number of outputs") | |
parser.add_argument("--ebpf", action="store_true", | |
help=argparse.SUPPRESS) | |
# FIXME clean this up | |
args = parser.parse_args() | |
interval = int(args.interval) | |
countdown = int(args.count) | |
maxrows = int(args.maxrows) | |
clear = not int(args.noclear) | |
outfile = args.output | |
pid = args.pid | |
old_settings = termios.tcgetattr(sys.stdin) | |
sort_mode = "C" | |
sort_ascending = True | |
exiting = 0 | |
first_loop = True | |
sort_modes = { | |
"C" : "calls", # total calls to key | |
"S" : "size", # latest size of key | |
"R" : "req/s", # requests per second to this key | |
"B" : "bw", # total bytes accesses on this key | |
"N" : "ts" # timestamp of the latest access | |
} | |
commands = { | |
"T" : "toggle", # sorting by ascending / descending order | |
"D" : "dump", # clear eBPF maps and dump to disk (if set) | |
"Q" : "quit" # exit mctop | |
} | |
# FIXME have helper to generate per type? | |
# load BPF program | |
bpf_text = """ | |
#include <uapi/linux/ptrace.h> | |
#include <bcc/proto.h> | |
#define MAX_STRING_LENGTH 80 | |
// Must match python definitions | |
typedef enum {START, END, GET, ADD, SET, REPLACE, PREPEND, APPEND, | |
TOUCH, CAS, INCR, DECR, DELETE} memcached_op_t; | |
struct keyhit_t { | |
char keystr[MAX_STRING_LENGTH]; | |
}; | |
struct value_t { | |
u64 count; | |
u64 bytecount; | |
u64 totalbytes; | |
u64 keysize; | |
u64 timestamp; | |
}; | |
BPF_HASH(keyhits, struct keyhit_t, struct value_t); | |
int trace_entry(struct pt_regs *ctx) { | |
u64 keystr = 0; | |
int32_t bytecount = 0; // type is -4@%eax in stap notes, which is int32 | |
uint8_t keysize = 0; // type is 1@%cl, which should be uint8 | |
struct keyhit_t keyhit = {0}; | |
struct value_t *valp, zero = {}; | |
bpf_usdt_readarg(2, ctx, &keystr); | |
bpf_usdt_readarg(3, ctx, &keysize); | |
bpf_usdt_readarg(4, ctx, &bytecount); | |
// see https://github.com/memcached/memcached/issues/576 | |
// ideally per https://github.com/iovisor/bcc/issues/1260 we should be able to | |
// read just the size we need, but this doesn't seem possible and throws a | |
// verifier error | |
bpf_probe_read(&keyhit.keystr, sizeof(keyhit.keystr), (void *)keystr); | |
valp = keyhits.lookup_or_init(&keyhit, &zero); | |
valp->count++; | |
valp->bytecount = bytecount; | |
valp->keysize = keysize; | |
valp->totalbytes += bytecount; | |
valp->timestamp = bpf_ktime_get_ns(); | |
return 0; | |
} | |
""" | |
# Since it is possible that we read the keys incorrectly, we need to fix the | |
# hash keys and combine their values intelligently here, producing a new hash | |
# see https://github.com/memcached/memcached/issues/576 | |
# A possible solution may be in flagging to the verifier that the size given | |
# by a usdt argument is less than the buffer size, | |
# see https://github.com/iovisor/bcc/issues/1260#issuecomment-406365168 | |
def reconcile_keys(bpf_map): | |
new_map = {} | |
for k,v in bpf_map.items(): | |
shortkey = k.keystr[:v.keysize].decode('utf-8', 'replace') | |
if shortkey in new_map: | |
# Sum counts on key collision | |
new_map[shortkey]['count'] += v.count | |
new_map[shortkey]['totalbytes'] += v.totalbytes | |
# If there is a key collision, take the data for the latest one | |
if v.timestamp > new_map[shortkey]['timestamp']: | |
new_map[shortkey]['bytecount'] = v.bytecount | |
new_map[shortkey]['timestamp'] = v.timestamp | |
else: | |
new_map[shortkey] = { | |
"count": v.count, | |
"bytecount": v.bytecount, | |
"totalbytes": v.totalbytes, | |
"timestamp": v.timestamp, | |
} | |
return new_map | |
def sort_output(unsorted_map): | |
global sort_mode | |
global sort_ascending | |
output = unsorted_map | |
if sort_mode == "C": | |
output = sorted(output.items(), key=lambda x: x[1]['count']) | |
elif sort_mode == "S": | |
output = sorted(output.items(), key=lambda x: x[1]['bytecount']) | |
elif sort_mode == "R": | |
output = sorted(output.items(), key=lambda x: x[1]['bandwidth']) | |
elif sort_mode == "B": | |
output = sorted(output.items(), key=lambda x: x[1]['cps']) | |
elif sort_mode == "N": | |
output = sorted(output.items(), key=lambda x: x[1]['timestamp']) | |
if sort_ascending: | |
output = reversed(output) | |
return output | |
# Set stdin to non-blocking reads so we can poll for chars | |
def readKey(interval): | |
new_settings = termios.tcgetattr(sys.stdin) | |
new_settings[3] = new_settings[3] & ~(termios.ECHO | termios.ICANON) | |
tty.setcbreak(sys.stdin.fileno()) | |
if select.select([sys.stdin], [], [], 5) == ([sys.stdin], [], []): | |
key = sys.stdin.read(1).lower() | |
global sort_mode | |
if key == 't': | |
global sort_ascending | |
sort_ascending = not sort_ascending | |
elif key == 'c': | |
sort_mode= 'C' | |
elif key == 's': | |
sort_mode= 'S' | |
elif key == 'r': | |
sort_mode= 'R' | |
elif key == 'b': | |
sort_mode= 'B' | |
elif key == 'n': | |
sort_mode= 'N' | |
elif key == 'd': | |
global outfile | |
global bpf | |
global sorted_output | |
keyhits = bpf.get_table("keyhits") | |
out = open ('/tmp/%s.mcdump' % outfile, 'wb') | |
pickle.dump(sorted_output, out) | |
out.close | |
keyhits.clear() | |
elif key == 'q': | |
global exiting | |
exiting = 1 | |
if args.ebpf: | |
print(bpf_text) | |
exit() | |
usdt = USDT(pid=pid) | |
usdt.enable_probe(probe="command__set", fn_name="trace_entry") # FIXME use fully specified version, port this to python | |
bpf = BPF(text=bpf_text, usdt_contexts=[usdt]) | |
start = monotonic(); # FIXME would prefer monotonic_ns, if 3.7+ | |
while 1: | |
try: | |
if not first_loop: | |
readKey(interval) | |
else: | |
first_loop = False | |
except KeyboardInterrupt: | |
exiting = 1 | |
# header | |
if clear: | |
print("\033c", end="") | |
print("%-30s %8s %8s %8s %8s %8s" % ("MEMCACHED KEY", "CALLS", | |
"OBJSIZE", "REQ/S", | |
"BW(kbps)", "TOTAL") ) | |
keyhits = bpf.get_table("keyhits") | |
line = 0 | |
interval = monotonic() - start; | |
fixed_map = reconcile_keys(keyhits) | |
for k,v in fixed_map.items(): | |
fixed_map[k]['cps'] = v['count'] / interval; | |
fixed_map[k]['bandwidth'] = (v['totalbytes'] / 1000) / interval; | |
sorted_output = sort_output(fixed_map) | |
for i, tup in enumerate(sorted_output): # FIXME sort this | |
k = tup[0]; v = tup[1] | |
print("%-30s %8d %8d %8f %8f %8d" % (k, v['count'], v['bytecount'], | |
v['cps'], v['bandwidth'], | |
v['totalbytes']) ) | |
line += 1 | |
if line >= maxrows: | |
break | |
print((maxrows - line) * "\r\n") | |
sys.stdout.write("[Curr: %s/%s Opt: %s:%s|%s:%s|%s:%s|%s:%s|%s:%s]" % | |
(sort_mode, | |
"Asc" if sort_ascending else "Dsc", | |
'C', sort_modes['C'], | |
'S', sort_modes['S'], | |
'R', sort_modes['R'], | |
'B', sort_modes['B'], | |
'N', sort_modes['N'] | |
)) | |
sys.stdout.write("[%s:%s %s:%s %s:%s]" % ( | |
'T', commands['T'], | |
'D', commands['D'], | |
'Q', commands['Q'] | |
)) | |
print("\033[%d;%dH" % (0, 0)) | |
if exiting: | |
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) | |
print("\033c", end="") | |
exit() |
bpf: Failed to load program: Permission denied | |
0: (b7) r3 = 0 | |
1: (7b) *(u64 *)(r10 -8) = r3 | |
last_idx 1 first_idx 0 | |
regs=8 stack=0 before 0: (b7) r3 = 0 | |
2: (7b) *(u64 *)(r10 -16) = r3 | |
3: (7b) *(u64 *)(r10 -24) = r3 | |
4: (7b) *(u64 *)(r10 -32) = r3 | |
5: (7b) *(u64 *)(r10 -40) = r3 | |
6: (7b) *(u64 *)(r10 -48) = r3 | |
7: (7b) *(u64 *)(r10 -56) = r3 | |
8: (7b) *(u64 *)(r10 -64) = r3 | |
9: (7b) *(u64 *)(r10 -72) = r3 | |
10: (7b) *(u64 *)(r10 -80) = r3 | |
11: (7b) *(u64 *)(r10 -88) = r3 | |
12: (7b) *(u64 *)(r10 -96) = r3 | |
13: (7b) *(u64 *)(r10 -104) = r3 | |
14: (7b) *(u64 *)(r10 -112) = r3 | |
15: (7b) *(u64 *)(r10 -120) = r3 | |
16: (79) r2 = *(u64 *)(r1 +128) | |
17: (18) r4 = 0x55c2c9ad1ace | |
19: (1d) if r2 == r4 goto pc+5 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0) R3_w=invP0 R4_w=inv94295095581390 R10=fp0 fp-8_w=00000000 fp-16_w=00000000 fp-24_w=00000000 fp-32_w=00000000 fp-40_w=00000000 fp-48_w=00000000 fp-56_w=00000000 fp-64_w=00000000 fp-72_w=00000000 fp-80_w=00000000 fp-88_w=00000000 fp-96_w=00000000 fp-104_w=00000000 fp-112_w=00000000 fp-120_w=00000000 | |
20: (18) r4 = 0x55c2c9ad0a66 | |
22: (5d) if r2 != r4 goto pc+4 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv94295095577190 R3_w=invP0 R4_w=inv94295095577190 R10=fp0 fp-8_w=00000000 fp-16_w=00000000 fp-24_w=00000000 fp-32_w=00000000 fp-40_w=00000000 fp-48_w=00000000 fp-56_w=00000000 fp-64_w=00000000 fp-72_w=00000000 fp-80_w=00000000 fp-88_w=00000000 fp-96_w=00000000 fp-104_w=00000000 fp-112_w=00000000 fp-120_w=00000000 | |
23: (79) r3 = *(u64 *)(r1 +104) | |
24: (05) goto pc+1 | |
26: (79) r2 = *(u64 *)(r1 +128) | |
27: (18) r4 = 0x55c2c9ad1ace | |
29: (1d) if r2 == r4 goto pc+6 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0) R3=inv(id=0) R4_w=inv94295095581390 R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
30: (b7) r7 = 0 | |
31: (18) r4 = 0x55c2c9ad0a66 | |
33: (5d) if r2 != r4 goto pc+3 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv94295095577190 R3=inv(id=0) R4_w=inv94295095577190 R7_w=inv0 R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
34: (79) r7 = *(u64 *)(r1 +88) | |
35: (05) goto pc+1 | |
37: (79) r2 = *(u64 *)(r1 +128) | |
38: (18) r4 = 0x55c2c9ad1ace | |
40: (1d) if r2 == r4 goto pc+6 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0) R3=inv(id=0) R4_w=inv94295095581390 R7=inv(id=0) R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
41: (b7) r8 = 0 | |
42: (18) r4 = 0x55c2c9ad0a66 | |
44: (5d) if r2 != r4 goto pc+3 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv94295095577190 R3=inv(id=0) R4_w=inv94295095577190 R7=inv(id=0) R8_w=inv0 R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
45: (79) r8 = *(u64 *)(r1 +80) | |
46: (05) goto pc+1 | |
48: (bf) r1 = r7 | |
49: (57) r1 &= 255 | |
50: (b7) r4 = 80 | |
51: (bf) r2 = r7 | |
52: (2d) if r4 > r1 goto pc+1 | |
R1_w=inv(id=0,umin_value=80,umax_value=255,var_off=(0x0; 0xff)) R2_w=inv(id=0) R3=inv(id=0) R4_w=inv80 R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
53: (b7) r2 = 80 | |
54: (57) r2 &= 255 | |
55: (bf) r6 = r10 | |
56: (07) r6 += -80 | |
57: (bf) r1 = r6 | |
58: (85) call bpf_probe_read#4 | |
last_idx 58 first_idx 48 | |
regs=4 stack=0 before 57: (bf) r1 = r6 | |
regs=4 stack=0 before 56: (07) r6 += -80 | |
regs=4 stack=0 before 55: (bf) r6 = r10 | |
regs=4 stack=0 before 54: (57) r2 &= 255 | |
regs=4 stack=0 before 53: (b7) r2 = 80 | |
59: (18) r1 = 0xffff9664db8bf400 | |
61: (bf) r2 = r6 | |
62: (85) call bpf_map_lookup_elem#1 | |
63: (bf) r6 = r0 | |
64: (55) if r6 != 0x0 goto pc+15 | |
R0_w=inv0 R6_w=inv0 R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
65: (18) r1 = 0xffff9664db8bf400 | |
67: (bf) r6 = r10 | |
68: (07) r6 += -80 | |
69: (bf) r3 = r10 | |
70: (07) r3 += -120 | |
71: (bf) r2 = r6 | |
72: (b7) r4 = 1 | |
73: (85) call bpf_map_update_elem#2 | |
74: (18) r1 = 0xffff9664db8bf400 | |
76: (bf) r2 = r6 | |
77: (85) call bpf_map_lookup_elem#1 | |
78: (bf) r6 = r0 | |
79: (15) if r6 == 0x0 goto pc+13 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
80: (57) r7 &= 255 | |
81: (7b) *(u64 *)(r6 +24) = r7 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
82: (79) r1 = *(u64 *)(r6 +0) | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
83: (07) r1 += 1 | |
84: (7b) *(u64 *)(r6 +0) = r1 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
85: (67) r8 <<= 32 | |
86: (c7) r8 s>>= 32 | |
87: (7b) *(u64 *)(r6 +8) = r8 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
88: (79) r1 = *(u64 *)(r6 +16) | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
89: (0f) r1 += r8 | |
90: (7b) *(u64 *)(r6 +16) = r1 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
91: (85) call bpf_ktime_get_ns#5 | |
92: (7b) *(u64 *)(r6 +32) = r0 | |
R0=inv(id=0) R6=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
93: (b7) r0 = 0 | |
94: (95) exit | |
from 79 to 93: R0_w=inv0 R6_w=inv0 R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
93: (b7) r0 = 0 | |
94: (95) exit | |
from 64 to 80: R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
80: (57) r7 &= 255 | |
81: (7b) *(u64 *)(r6 +24) = r7 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
82: (79) r1 = *(u64 *)(r6 +0) | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
83: (07) r1 += 1 | |
84: (7b) *(u64 *)(r6 +0) = r1 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
85: (67) r8 <<= 32 | |
86: (c7) r8 s>>= 32 | |
87: (7b) *(u64 *)(r6 +8) = r8 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
88: (79) r1 = *(u64 *)(r6 +16) | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
89: (0f) r1 += r8 | |
90: (7b) *(u64 *)(r6 +16) = r1 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
91: (85) call bpf_ktime_get_ns#5 | |
92: safe | |
from 52 to 54: R1=inv(id=0,umax_value=79,var_off=(0x0; 0x7f)) R2=inv(id=0) R3=inv(id=0) R4=inv80 R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
54: (57) r2 &= 255 | |
55: (bf) r6 = r10 | |
56: (07) r6 += -80 | |
57: (bf) r1 = r6 | |
58: (85) call bpf_probe_read#4 | |
invalid stack type R1 off=-80 access_size=255 | |
processed 103 insns (limit 1000000) max_states_per_insn 0 total_states 7 peak_states 7 mark_read 4 | |
Traceback (most recent call last): | |
File "tools/mctop.py", line 237, in <module> | |
bpf = BPF(text=bpf_text, usdt_contexts=[usdt]) | |
File "/usr/lib64/python3.6/site-packages/bcc/__init__.py", line 339, in __init__ | |
usdt_context.attach_uprobes(self) | |
File "/usr/lib64/python3.6/site-packages/bcc/usdt.py", line 194, in attach_uprobes | |
addr=addr, pid=pid) | |
File "/usr/lib64/python3.6/site-packages/bcc/__init__.py", line 1008, in attach_uprobe | |
fn = self.load_func(fn_name, BPF.KPROBE) | |
File "/usr/lib64/python3.6/site-packages/bcc/__init__.py", line 384, in load_func | |
(func_name, errstr)) | |
Exception: Failed to load BPF program b'trace_entry': Permission denied |
Disassembly of section .bpf.fn.trace_entry: | |
trace_entry: | |
; int trace_entry(struct pt_regs *ctx) { // Line 83 | |
0: b7 03 00 00 00 00 00 00 r3 = 0 | |
; struct keyhit_t keyhit = {0}; // Line 88 | |
1: 73 3a fe ff 00 00 00 00 *(u8 *)(r10 - 2) = r3 | |
2: 6b 3a fc ff 00 00 00 00 *(u16 *)(r10 - 4) = r3 | |
3: 63 3a f8 ff 00 00 00 00 *(u32 *)(r10 - 8) = r3 | |
4: 7b 3a f0 ff 00 00 00 00 *(u64 *)(r10 - 16) = r3 | |
5: 7b 3a e8 ff 00 00 00 00 *(u64 *)(r10 - 24) = r3 | |
6: 7b 3a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r3 | |
7: 7b 3a d8 ff 00 00 00 00 *(u64 *)(r10 - 40) = r3 | |
8: 7b 3a d0 ff 00 00 00 00 *(u64 *)(r10 - 48) = r3 | |
9: 7b 3a c8 ff 00 00 00 00 *(u64 *)(r10 - 56) = r3 | |
10: 7b 3a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r3 | |
11: 7b 3a b8 ff 00 00 00 00 *(u64 *)(r10 - 72) = r3 | |
12: 7b 3a b0 ff 00 00 00 00 *(u64 *)(r10 - 80) = r3 | |
13: 7b 3a a8 ff 00 00 00 00 *(u64 *)(r10 - 88) = r3 | |
14: 7b 3a a0 ff 00 00 00 00 *(u64 *)(r10 - 96) = r3 | |
15: 7b 3a 98 ff 00 00 00 00 *(u64 *)(r10 - 104) = r3 | |
16: 7b 3a 90 ff 00 00 00 00 *(u64 *)(r10 - 112) = r3 | |
17: 7b 3a 88 ff 00 00 00 00 *(u64 *)(r10 - 120) = r3 | |
18: 7b 3a 80 ff 00 00 00 00 *(u64 *)(r10 - 128) = r3 | |
19: 7b 3a 78 ff 00 00 00 00 *(u64 *)(r10 - 136) = r3 | |
20: 7b 3a 70 ff 00 00 00 00 *(u64 *)(r10 - 144) = r3 | |
21: 7b 3a 68 ff 00 00 00 00 *(u64 *)(r10 - 152) = r3 | |
22: 7b 3a 60 ff 00 00 00 00 *(u64 *)(r10 - 160) = r3 | |
23: 7b 3a 58 ff 00 00 00 00 *(u64 *)(r10 - 168) = r3 | |
24: 7b 3a 50 ff 00 00 00 00 *(u64 *)(r10 - 176) = r3 | |
25: 7b 3a 48 ff 00 00 00 00 *(u64 *)(r10 - 184) = r3 | |
26: 7b 3a 40 ff 00 00 00 00 *(u64 *)(r10 - 192) = r3 | |
27: 7b 3a 38 ff 00 00 00 00 *(u64 *)(r10 - 200) = r3 | |
28: 7b 3a 30 ff 00 00 00 00 *(u64 *)(r10 - 208) = r3 | |
29: 7b 3a 28 ff 00 00 00 00 *(u64 *)(r10 - 216) = r3 | |
30: 7b 3a 20 ff 00 00 00 00 *(u64 *)(r10 - 224) = r3 | |
31: 7b 3a 18 ff 00 00 00 00 *(u64 *)(r10 - 232) = r3 | |
32: 7b 3a 10 ff 00 00 00 00 *(u64 *)(r10 - 240) = r3 | |
33: 7b 3a 08 ff 00 00 00 00 *(u64 *)(r10 - 248) = r3 | |
34: 7b 3a 00 ff 00 00 00 00 *(u64 *)(r10 - 256) = r3 | |
; struct value_t *valp, zero = {}; // Line 89 | |
35: 7b 3a f8 fe 00 00 00 00 *(u64 *)(r10 - 264) = r3 | |
36: 7b 3a f0 fe 00 00 00 00 *(u64 *)(r10 - 272) = r3 | |
37: 7b 3a e8 fe 00 00 00 00 *(u64 *)(r10 - 280) = r3 | |
38: 7b 3a e0 fe 00 00 00 00 *(u64 *)(r10 - 288) = r3 | |
39: 7b 3a d8 fe 00 00 00 00 *(u64 *)(r10 - 296) = r3 | |
; switch(PT_REGS_IP(ctx)) { // Line 24 | |
40: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
41: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
43: 1d 42 05 00 00 00 00 00 if r2 == r4 goto +5 | |
44: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
46: 5d 42 04 00 00 00 00 00 if r2 != r4 goto +4 | |
; case 0x56441ad30a66ULL: *((uint64_t *)dest) = ctx->si; __asm__ __volatile__("": : :"memory"); return 0; // Line 25 | |
47: 79 13 68 00 00 00 00 00 r3 = *(u64 *)(r1 + 104) | |
48: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((uint64_t *)dest) = ctx->si; __asm__ __volatile__("": : :"memory"); return 0; // Line 26 | |
49: 79 13 68 00 00 00 00 00 r3 = *(u64 *)(r1 + 104) | |
50: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
51: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
; switch(PT_REGS_IP(ctx)) { // Line 33 | |
53: 1d 42 06 00 00 00 00 00 if r2 == r4 goto +6 | |
54: b7 06 00 00 00 00 00 00 r6 = 0 | |
55: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
57: 5d 42 03 00 00 00 00 00 if r2 != r4 goto +3 | |
; case 0x56441ad30a66ULL: *((uint8_t *)dest) = ctx->cx; __asm__ __volatile__("": : :"memory"); return 0; // Line 34 | |
58: 71 16 58 00 00 00 00 00 r6 = *(u8 *)(r1 + 88) | |
59: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((uint8_t *)dest) = ctx->cx; __asm__ __volatile__("": : :"memory"); return 0; // Line 35 | |
60: 71 16 58 00 00 00 00 00 r6 = *(u8 *)(r1 + 88) | |
61: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
; switch(PT_REGS_IP(ctx)) { // Line 42 | |
62: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
64: 1d 42 06 00 00 00 00 00 if r2 == r4 goto +6 | |
65: b7 08 00 00 00 00 00 00 r8 = 0 | |
66: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
68: 5d 42 03 00 00 00 00 00 if r2 != r4 goto +3 | |
; case 0x56441ad30a66ULL: *((int32_t *)dest) = ctx->ax; __asm__ __volatile__("": : :"memory"); return 0; // Line 43 | |
69: 79 18 50 00 00 00 00 00 r8 = *(u64 *)(r1 + 80) | |
70: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((int32_t *)dest) = ctx->ax; __asm__ __volatile__("": : :"memory"); return 0; // Line 44 | |
71: 79 18 50 00 00 00 00 00 r8 = *(u64 *)(r1 + 80) | |
72: bf a7 00 00 00 00 00 00 r7 = r10 | |
; bpf_probe_read(&keyhit.keystr, probe_read_size, (void *)keystr); // Line 102 | |
73: 07 07 00 00 00 ff ff ff r7 += -256 | |
74: bf 71 00 00 00 00 00 00 r1 = r7 | |
75: bf 62 00 00 00 00 00 00 r2 = r6 | |
76: 85 00 00 00 04 00 00 00 call 4 | |
; valp = ({typeof(keyhits.leaf) *leaf = bpf_map_lookup_elem_(bpf_pseudo_fd(1, -1), &keyhit); if (!leaf) { bpf_map_update_elem_(bpf_pseudo_fd(1, -1), &keyhit, &zero, BPF_NOEXIST); leaf = bpf_map_lookup_elem_(bpf_pseudo_fd(1, -1), &keyhit); if (!leaf) return 0;}leaf;}); // Line 104 | |
77: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
79: bf 72 00 00 00 00 00 00 r2 = r7 | |
80: 85 00 00 00 01 00 00 00 call 1 | |
81: bf 07 00 00 00 00 00 00 r7 = r0 | |
82: 55 07 0f 00 00 00 00 00 if r7 != 0 goto +15 | |
83: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
85: bf a7 00 00 00 00 00 00 r7 = r10 | |
86: 07 07 00 00 00 ff ff ff r7 += -256 | |
87: bf a3 00 00 00 00 00 00 r3 = r10 | |
88: 07 03 00 00 d8 fe ff ff r3 += -296 | |
89: bf 72 00 00 00 00 00 00 r2 = r7 | |
90: b7 04 00 00 01 00 00 00 r4 = 1 | |
91: 85 00 00 00 02 00 00 00 call 2 | |
92: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
94: bf 72 00 00 00 00 00 00 r2 = r7 | |
95: 85 00 00 00 01 00 00 00 call 1 | |
96: bf 07 00 00 00 00 00 00 r7 = r0 | |
97: 15 07 0c 00 00 00 00 00 if r7 == 0 goto +12 | |
; valp->keysize = keysize; // Line 107 | |
98: 7b 67 18 00 00 00 00 00 *(u64 *)(r7 + 24) = r6 | |
; valp->count++; // Line 105 | |
99: 79 71 00 00 00 00 00 00 r1 = *(u64 *)(r7 + 0) | |
100: 07 01 00 00 01 00 00 00 r1 += 1 | |
101: 7b 17 00 00 00 00 00 00 *(u64 *)(r7 + 0) = r1 | |
; valp->bytecount = bytecount; // Line 106 | |
102: 67 08 00 00 20 00 00 00 r8 <<= 32 | |
103: c7 08 00 00 20 00 00 00 r8 s>>= 32 | |
104: 7b 87 08 00 00 00 00 00 *(u64 *)(r7 + 8) = r8 | |
; valp->totalbytes += bytecount; // Line 108 | |
105: 79 71 10 00 00 00 00 00 r1 = *(u64 *)(r7 + 16) | |
106: 0f 81 00 00 00 00 00 00 r1 += r8 | |
107: 7b 17 10 00 00 00 00 00 *(u64 *)(r7 + 16) = r1 | |
; valp->timestamp = bpf_ktime_get_ns(); // Line 109 | |
108: 85 00 00 00 05 00 00 00 call 5 | |
109: 7b 07 20 00 00 00 00 00 *(u64 *)(r7 + 32) = r0 | |
; } // Line 113 | |
110: b7 00 00 00 00 00 00 00 r0 = 0 | |
111: 95 00 00 00 00 00 00 00 exit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment