Last active
November 27, 2019 20:24
-
-
Save dalehamel/7a74b83b5a66bed0aed3e4b3292a68f7 to your computer and use it in GitHub Desktop.
mctop bcc/bpftrace implementations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Disassembly of section .bpf.fn.trace_entry: | |
trace_entry: | |
; int trace_entry(struct pt_regs *ctx) { // Line 83 | |
0: b7 03 00 00 00 00 00 00 r3 = 0 | |
; struct keyhit_t keyhit = {0}; // Line 88 | |
1: 7b 3a f8 ff 00 00 00 00 *(u64 *)(r10 - 8) = r3 | |
2: 7b 3a f0 ff 00 00 00 00 *(u64 *)(r10 - 16) = r3 | |
3: 7b 3a e8 ff 00 00 00 00 *(u64 *)(r10 - 24) = r3 | |
4: 7b 3a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r3 | |
5: 7b 3a d8 ff 00 00 00 00 *(u64 *)(r10 - 40) = r3 | |
6: 7b 3a d0 ff 00 00 00 00 *(u64 *)(r10 - 48) = r3 | |
7: 7b 3a c8 ff 00 00 00 00 *(u64 *)(r10 - 56) = r3 | |
8: 7b 3a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r3 | |
9: 7b 3a b8 ff 00 00 00 00 *(u64 *)(r10 - 72) = r3 | |
10: 7b 3a b0 ff 00 00 00 00 *(u64 *)(r10 - 80) = r3 | |
; struct value_t *valp, zero = {}; // Line 89 | |
11: 7b 3a a8 ff 00 00 00 00 *(u64 *)(r10 - 88) = r3 | |
12: 7b 3a a0 ff 00 00 00 00 *(u64 *)(r10 - 96) = r3 | |
13: 7b 3a 98 ff 00 00 00 00 *(u64 *)(r10 - 104) = r3 | |
14: 7b 3a 90 ff 00 00 00 00 *(u64 *)(r10 - 112) = r3 | |
15: 7b 3a 88 ff 00 00 00 00 *(u64 *)(r10 - 120) = r3 | |
; switch(PT_REGS_IP(ctx)) { // Line 24 | |
16: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
17: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
19: 1d 42 05 00 00 00 00 00 if r2 == r4 goto +5 | |
20: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
22: 5d 42 04 00 00 00 00 00 if r2 != r4 goto +4 | |
; case 0x56441ad30a66ULL: *((uint64_t *)dest) = ctx->si; __asm__ __volatile__("": : :"memory"); return 0; // Line 25 | |
23: 79 13 68 00 00 00 00 00 r3 = *(u64 *)(r1 + 104) | |
24: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((uint64_t *)dest) = ctx->si; __asm__ __volatile__("": : :"memory"); return 0; // Line 26 | |
25: 79 13 68 00 00 00 00 00 r3 = *(u64 *)(r1 + 104) | |
26: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
27: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
; switch(PT_REGS_IP(ctx)) { // Line 33 | |
29: 1d 42 06 00 00 00 00 00 if r2 == r4 goto +6 | |
30: b7 07 00 00 00 00 00 00 r7 = 0 | |
31: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
33: 5d 42 03 00 00 00 00 00 if r2 != r4 goto +3 | |
; case 0x56441ad30a66ULL: *((uint8_t *)dest) = ctx->cx; __asm__ __volatile__("": : :"memory"); return 0; // Line 34 | |
34: 79 17 58 00 00 00 00 00 r7 = *(u64 *)(r1 + 88) | |
35: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((uint8_t *)dest) = ctx->cx; __asm__ __volatile__("": : :"memory"); return 0; // Line 35 | |
36: 79 17 58 00 00 00 00 00 r7 = *(u64 *)(r1 + 88) | |
37: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
; switch(PT_REGS_IP(ctx)) { // Line 42 | |
38: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
40: 1d 42 06 00 00 00 00 00 if r2 == r4 goto +6 | |
41: b7 08 00 00 00 00 00 00 r8 = 0 | |
42: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
44: 5d 42 03 00 00 00 00 00 if r2 != r4 goto +3 | |
; case 0x56441ad30a66ULL: *((int32_t *)dest) = ctx->ax; __asm__ __volatile__("": : :"memory"); return 0; // Line 43 | |
45: 79 18 50 00 00 00 00 00 r8 = *(u64 *)(r1 + 80) | |
46: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((int32_t *)dest) = ctx->ax; __asm__ __volatile__("": : :"memory"); return 0; // Line 44 | |
47: 79 18 50 00 00 00 00 00 r8 = *(u64 *)(r1 + 80) | |
48: bf 71 00 00 00 00 00 00 r1 = r7 | |
; probe_read_size = keysize < sizeof(keyhit.keystr) ? keysize : sizeof(keyhit.keystr); // Line 96 | |
49: 57 01 00 00 ff 00 00 00 r1 &= 255 | |
50: b7 04 00 00 50 00 00 00 r4 = 80 | |
51: bf 72 00 00 00 00 00 00 r2 = r7 | |
52: 2d 14 01 00 00 00 00 00 if r4 > r1 goto +1 | |
53: b7 02 00 00 50 00 00 00 r2 = 80 | |
; bpf_probe_read(&keyhit.keystr, probe_read_size, (void *)keystr); // Line 102 | |
54: 57 02 00 00 ff 00 00 00 r2 &= 255 | |
55: bf a6 00 00 00 00 00 00 r6 = r10 | |
; uint8_t probe_read_size = 0; // Line 90 | |
56: 07 06 00 00 b0 ff ff ff r6 += -80 | |
; bpf_probe_read(&keyhit.keystr, probe_read_size, (void *)keystr); // Line 102 | |
57: bf 61 00 00 00 00 00 00 r1 = r6 | |
58: 85 00 00 00 04 00 00 00 call 4 | |
; valp = ({typeof(keyhits.leaf) *leaf = bpf_map_lookup_elem_(bpf_pseudo_fd(1, -1), &keyhit); if (!leaf) { bpf_map_update_elem_(bpf_pseudo_fd(1, -1), &keyhit, &zero, BPF_NOEXIST); leaf = bpf_map_lookup_elem_(bpf_pseudo_fd(1, -1), &keyhit); if (!leaf) return 0;}leaf;}); // Line 104 | |
59: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
61: bf 62 00 00 00 00 00 00 r2 = r6 | |
62: 85 00 00 00 01 00 00 00 call 1 | |
63: bf 06 00 00 00 00 00 00 r6 = r0 | |
64: 55 06 0f 00 00 00 00 00 if r6 != 0 goto +15 | |
65: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
67: bf a6 00 00 00 00 00 00 r6 = r10 | |
68: 07 06 00 00 b0 ff ff ff r6 += -80 | |
69: bf a3 00 00 00 00 00 00 r3 = r10 | |
70: 07 03 00 00 88 ff ff ff r3 += -120 | |
71: bf 62 00 00 00 00 00 00 r2 = r6 | |
72: b7 04 00 00 01 00 00 00 r4 = 1 | |
73: 85 00 00 00 02 00 00 00 call 2 | |
74: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
76: bf 62 00 00 00 00 00 00 r2 = r6 | |
77: 85 00 00 00 01 00 00 00 call 1 | |
78: bf 06 00 00 00 00 00 00 r6 = r0 | |
79: 15 06 0d 00 00 00 00 00 if r6 == 0 goto +13 | |
; valp->keysize = keysize; // Line 107 | |
80: 57 07 00 00 ff 00 00 00 r7 &= 255 | |
81: 7b 76 18 00 00 00 00 00 *(u64 *)(r6 + 24) = r7 | |
; valp->count++; // Line 105 | |
82: 79 61 00 00 00 00 00 00 r1 = *(u64 *)(r6 + 0) | |
83: 07 01 00 00 01 00 00 00 r1 += 1 | |
84: 7b 16 00 00 00 00 00 00 *(u64 *)(r6 + 0) = r1 | |
; valp->bytecount = bytecount; // Line 106 | |
85: 67 08 00 00 20 00 00 00 r8 <<= 32 | |
86: c7 08 00 00 20 00 00 00 r8 s>>= 32 | |
87: 7b 86 08 00 00 00 00 00 *(u64 *)(r6 + 8) = r8 | |
; valp->totalbytes += bytecount; // Line 108 | |
88: 79 61 10 00 00 00 00 00 r1 = *(u64 *)(r6 + 16) | |
89: 0f 81 00 00 00 00 00 00 r1 += r8 | |
90: 7b 16 10 00 00 00 00 00 *(u64 *)(r6 + 16) = r1 | |
; valp->timestamp = bpf_ktime_get_ns(); // Line 109 | |
91: 85 00 00 00 05 00 00 00 call 5 | |
92: 7b 06 20 00 00 00 00 00 *(u64 *)(r6 + 32) = r0 | |
; } // Line 113 | |
93: b7 00 00 00 00 00 00 00 r0 = 0 | |
94: 95 00 00 00 00 00 00 00 exit | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM alpine:3.10 as builder | |
# ensure SASL's "libplain.so" is installed as per https://github.com/memcached/memcached/wiki/SASLHowto | |
RUN apk add --no-cache cyrus-sasl-plain wget binutils | |
ENV MEMCACHED_VERSION 1.5.19 | |
ENV MEMCACHED_SHA1 14e6a02e743838696fcb620edf6a2fd7e60cabec | |
# Get dtrace dependencies for alpine in a kinda hacky way | |
RUN mkdir /build && cd /build && wget http://launchpadlibrarian.net/251391227/systemtap-sdt-dev_2.9-2ubuntu2_amd64.deb && \ | |
ar x systemtap-sdt-dev_2.9-2ubuntu2_amd64.deb && \ | |
tar -xpf data.tar.xz && mv usr/bin/dtrace /usr/bin/dtrace && \ | |
mkdir -p /usr/include/sys && \ | |
mv usr/include/x86_64-linux-gnu/sys/* /usr/include/sys && rm -rf /build | |
RUN set -x \ | |
\ | |
&& apk add --no-cache --virtual .build-deps \ | |
ca-certificates \ | |
coreutils \ | |
cyrus-sasl-dev \ | |
dpkg-dev dpkg \ | |
gcc \ | |
libc-dev \ | |
libevent-dev \ | |
linux-headers \ | |
make \ | |
openssl \ | |
perl \ | |
perl-utils \ | |
tar \ | |
wget \ | |
\ | |
&& wget -O memcached.tar.gz "https://memcached.org/files/memcached-$MEMCACHED_VERSION.tar.gz" \ | |
&& echo "$MEMCACHED_SHA1 memcached.tar.gz" | sha1sum -c - \ | |
&& mkdir -p /usr/src/memcached \ | |
&& tar -xzf memcached.tar.gz -C /usr/src/memcached --strip-components=1 \ | |
&& rm memcached.tar.gz \ | |
\ | |
&& cd /usr/src/memcached \ | |
\ | |
&& gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" \ | |
&& enableExtstore="$( \ | |
# https://github.com/docker-library/memcached/pull/38 | |
case "$gnuArch" in \ | |
# https://github.com/memcached/memcached/issues/381 "--enable-extstore on s390x (IBM System Z mainframe architecture) fails tests" | |
s390x-*) ;; \ | |
*) echo '--enable-extstore' ;; \ | |
esac \ | |
)" \ | |
&& ./configure \ | |
CFLAGS="-ggdb3" \ | |
CXXFLAGS="-ggdb3" \ | |
LDFLAGS="-ggdb3" \ | |
--build="$gnuArch" \ | |
--enable-sasl \ | |
--enable-sasl-pwdb \ | |
--enable-dtrace \ | |
$enableExtstore \ | |
&& make -j "$(nproc)" \ | |
\ | |
&& make test \ | |
&& make install \ | |
&& memcached -V | |
FROM alpine:3.10 | |
ENV UID 11211 | |
ENV GID 11211 | |
RUN apk update && apk upgrade && apk add --no-cache coreutils \ | |
libevent \ | |
libsasl \ | |
file \ | |
bash && \ | |
# add our user and group first to make sure their IDs get assigned consistently, regardless of whatever dependencies get added | |
addgroup -g $GID memcache && adduser -D -u $UID -G memcache memcache | |
COPY --from=builder /usr/local/bin/memcached /usr/local/bin/memcached | |
COPY health.sh /usr/local/bin/ | |
ENTRYPOINT ["memcached"] | |
CMD ["memcached"] | |
USER $UID:$GID |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BEGIN | |
{ | |
printf("%-20s %10s %10s %10s\n", "MEMCACHED KEY", "CALLS", "OBJSIZE", "REQ/s"); | |
@start = nsecs; | |
} | |
// NOTE - this presently omits incr, decr, and delete because they have a | |
// different signature | |
usdt::memcached:command__get, | |
usdt::memcached:command__set, | |
usdt::memcached:command__add, | |
usdt::memcached:command__append, | |
usdt::memcached:command__prepend, | |
usdt::memcached:command__touch, | |
usdt::memcached:command__cas, | |
usdt::memcached:command__replace | |
{ | |
@calls[str(arg1, arg2)]++; | |
$objsize = arg3; | |
$interval = (nsecs - @start) / 1000000000; | |
$cps = @calls[str(arg1)] / $interval; | |
printf("%-20s %10d %10d %10d\n", str(arg1, arg2), @calls[str(arg1, arg2)], | |
$objsize, $cps) | |
} | |
END | |
{ | |
clear(@start); | |
clear(@calls); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# @lint-avoid-python-3-compatibility-imports | |
# | |
# mctop Memcached key operation analysis tool | |
# For Linux, uses BCC, eBPF. | |
# | |
# USAGE: mctop.py -p PID | |
# | |
# This uses in-kernel eBPF maps to trace and analyze key access rates and | |
# objects. This can help to spot hot keys, and tune memcached usage for | |
# performance. | |
# | |
# Copyright 2019 Shopify, Inc. | |
# Licensed under the Apache License, Version 2.0 (the "License") | |
# | |
# 17-Nov-2019 Dale Hamel Created this. | |
# Inspired by the ruby tool of the same name by Marcus Barczak in 2012, | |
# see https://codeascraft.com/2012/12/13/mctop-a-tool-for-analyzing-memcache-get-traffic/ | |
# see also https://github.com/tumblr/memkeys | |
from __future__ import print_function | |
from time import sleep, strftime, monotonic | |
from bcc import BPF, USDT, utils | |
from subprocess import call | |
import argparse | |
import sys | |
import select | |
import tty | |
import termios | |
import pickle | |
# FIXME better help | |
# arguments | |
examples = """examples: | |
./mctop -p PID # memcached usage top, 1 second refresh | |
""" | |
parser = argparse.ArgumentParser( | |
description="Memcached top key analysis", | |
formatter_class=argparse.RawDescriptionHelpFormatter, | |
epilog=examples) | |
parser.add_argument("-p", "--pid", type=int, help="process id to attach to") | |
parser.add_argument("-o", "--output", action="store", | |
help="save map data to pickle file dump command is issued") # FIXME make this JSON | |
parser.add_argument("-C", "--noclear", action="store_true", | |
help="don't clear the screen") | |
parser.add_argument("-r", "--maxrows", default=20, | |
help="maximum rows to print, default 20") | |
parser.add_argument("interval", nargs="?", default=1, | |
help="output interval, in seconds") | |
parser.add_argument("count", nargs="?", default=99999999, | |
help="number of outputs") | |
parser.add_argument("--ebpf", action="store_true", | |
help=argparse.SUPPRESS) | |
# FIXME clean this up | |
args = parser.parse_args() | |
interval = int(args.interval) | |
countdown = int(args.count) | |
maxrows = int(args.maxrows) | |
clear = not int(args.noclear) | |
outfile = args.output | |
pid = args.pid | |
old_settings = termios.tcgetattr(sys.stdin) | |
sort_mode = "C" | |
sort_ascending = True | |
exiting = 0 | |
first_loop = True | |
sort_modes = { | |
"C" : "calls", # total calls to key | |
"S" : "size", # latest size of key | |
"R" : "req/s", # requests per second to this key | |
"B" : "bw", # total bytes accesses on this key | |
"N" : "ts" # timestamp of the latest access | |
} | |
commands = { | |
"T" : "toggle", # sorting by ascending / descending order | |
"D" : "dump", # clear eBPF maps and dump to disk (if set) | |
"Q" : "quit" # exit mctop | |
} | |
# FIXME have helper to generate per type? | |
# load BPF program | |
bpf_text = """ | |
#include <uapi/linux/ptrace.h> | |
#include <bcc/proto.h> | |
#define MAX_STRING_LENGTH 80 | |
// Must match python definitions | |
typedef enum {START, END, GET, ADD, SET, REPLACE, PREPEND, APPEND, | |
TOUCH, CAS, INCR, DECR, DELETE} memcached_op_t; | |
struct keyhit_t { | |
char keystr[MAX_STRING_LENGTH]; | |
}; | |
struct value_t { | |
u64 count; | |
u64 bytecount; | |
u64 totalbytes; | |
u64 keysize; | |
u64 timestamp; | |
}; | |
BPF_HASH(keyhits, struct keyhit_t, struct value_t); | |
int trace_entry(struct pt_regs *ctx) { | |
u64 keystr = 0; | |
int32_t bytecount = 0; // type is -4@%eax in stap notes, which is int32 | |
uint8_t keysize = 0; // type is 1@%cl, which should be uint8 | |
struct keyhit_t keyhit = {0}; | |
struct value_t *valp, zero = {}; | |
uint8_t probe_read_size = 0; | |
bpf_usdt_readarg(2, ctx, &keystr); | |
bpf_usdt_readarg(3, ctx, &keysize); | |
bpf_usdt_readarg(4, ctx, &bytecount); | |
probe_read_size = keysize < sizeof(keyhit.keystr) ? keysize : sizeof(keyhit.keystr); | |
// see https://github.com/memcached/memcached/issues/576 | |
// ideally per https://github.com/iovisor/bcc/issues/1260 we should be able to | |
// read just the size we need, but this doesn't seem possible and throws a | |
// verifier error | |
bpf_probe_read(&keyhit.keystr, probe_read_size, (void *)keystr); | |
valp = keyhits.lookup_or_init(&keyhit, &zero); | |
valp->count++; | |
valp->bytecount = bytecount; | |
valp->keysize = keysize; | |
valp->totalbytes += bytecount; | |
valp->timestamp = bpf_ktime_get_ns(); | |
return 0; | |
} | |
""" | |
# Since it is possible that we read the keys incorrectly, we need to fix the | |
# hash keys and combine their values intelligently here, producing a new hash | |
# see https://github.com/memcached/memcached/issues/576 | |
# A possible solution may be in flagging to the verifier that the size given | |
# by a usdt argument is less than the buffer size, | |
# see https://github.com/iovisor/bcc/issues/1260#issuecomment-406365168 | |
def reconcile_keys(bpf_map): | |
new_map = {} | |
for k,v in bpf_map.items(): | |
shortkey = k.keystr[:v.keysize].decode('utf-8', 'replace') | |
if shortkey in new_map: | |
# Sum counts on key collision | |
new_map[shortkey]['count'] += v.count | |
new_map[shortkey]['totalbytes'] += v.totalbytes | |
# If there is a key collision, take the data for the latest one | |
if v.timestamp > new_map[shortkey]['timestamp']: | |
new_map[shortkey]['bytecount'] = v.bytecount | |
new_map[shortkey]['timestamp'] = v.timestamp | |
else: | |
new_map[shortkey] = { | |
"count": v.count, | |
"bytecount": v.bytecount, | |
"totalbytes": v.totalbytes, | |
"timestamp": v.timestamp, | |
} | |
return new_map | |
def sort_output(unsorted_map): | |
global sort_mode | |
global sort_ascending | |
output = unsorted_map | |
if sort_mode == "C": | |
output = sorted(output.items(), key=lambda x: x[1]['count']) | |
elif sort_mode == "S": | |
output = sorted(output.items(), key=lambda x: x[1]['bytecount']) | |
elif sort_mode == "R": | |
output = sorted(output.items(), key=lambda x: x[1]['bandwidth']) | |
elif sort_mode == "B": | |
output = sorted(output.items(), key=lambda x: x[1]['cps']) | |
elif sort_mode == "N": | |
output = sorted(output.items(), key=lambda x: x[1]['timestamp']) | |
if sort_ascending: | |
output = reversed(output) | |
return output | |
# Set stdin to non-blocking reads so we can poll for chars | |
def readKey(interval): | |
new_settings = termios.tcgetattr(sys.stdin) | |
new_settings[3] = new_settings[3] & ~(termios.ECHO | termios.ICANON) | |
tty.setcbreak(sys.stdin.fileno()) | |
if select.select([sys.stdin], [], [], 5) == ([sys.stdin], [], []): | |
key = sys.stdin.read(1).lower() | |
global sort_mode | |
if key == 't': | |
global sort_ascending | |
sort_ascending = not sort_ascending | |
elif key == 'c': | |
sort_mode= 'C' | |
elif key == 's': | |
sort_mode= 'S' | |
elif key == 'r': | |
sort_mode= 'R' | |
elif key == 'b': | |
sort_mode= 'B' | |
elif key == 'n': | |
sort_mode= 'N' | |
elif key == 'd': | |
global outfile | |
global bpf | |
global sorted_output | |
keyhits = bpf.get_table("keyhits") | |
out = open ('/tmp/%s.mcdump' % outfile, 'wb') | |
pickle.dump(sorted_output, out) | |
out.close | |
keyhits.clear() | |
elif key == 'q': | |
global exiting | |
exiting = 1 | |
if args.ebpf: | |
print(bpf_text) | |
exit() | |
usdt = USDT(pid=pid) | |
usdt.enable_probe(probe="command__set", fn_name="trace_entry") # FIXME use fully specified version, port this to python | |
bpf = BPF(text=bpf_text, usdt_contexts=[usdt]) | |
start = monotonic(); # FIXME would prefer monotonic_ns, if 3.7+ | |
while 1: | |
try: | |
if not first_loop: | |
readKey(interval) | |
else: | |
first_loop = False | |
except KeyboardInterrupt: | |
exiting = 1 | |
# header | |
if clear: | |
print("\033c", end="") | |
print("%-30s %8s %8s %8s %8s %8s" % ("MEMCACHED KEY", "CALLS", | |
"OBJSIZE", "REQ/S", | |
"BW(kbps)", "TOTAL") ) | |
keyhits = bpf.get_table("keyhits") | |
line = 0 | |
interval = monotonic() - start; | |
fixed_map = reconcile_keys(keyhits) | |
for k,v in fixed_map.items(): | |
fixed_map[k]['cps'] = v['count'] / interval; | |
fixed_map[k]['bandwidth'] = (v['totalbytes'] / 1000) / interval; | |
sorted_output = sort_output(fixed_map) | |
for i, tup in enumerate(sorted_output): # FIXME sort this | |
k = tup[0]; v = tup[1] | |
print("%-30s %8d %8d %8f %8f %8d" % (k, v['count'], v['bytecount'], | |
v['cps'], v['bandwidth'], | |
v['totalbytes']) ) | |
line += 1 | |
if line >= maxrows: | |
break | |
print((maxrows - line) * "\r\n") | |
sys.stdout.write("[Curr: %s/%s Opt: %s:%s|%s:%s|%s:%s|%s:%s|%s:%s]" % | |
(sort_mode, | |
"Asc" if sort_ascending else "Dsc", | |
'C', sort_modes['C'], | |
'S', sort_modes['S'], | |
'R', sort_modes['R'], | |
'B', sort_modes['B'], | |
'N', sort_modes['N'] | |
)) | |
sys.stdout.write("[%s:%s %s:%s %s:%s]" % ( | |
'T', commands['T'], | |
'D', commands['D'], | |
'Q', commands['Q'] | |
)) | |
print("\033[%d;%dH" % (0, 0)) | |
if exiting: | |
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) | |
print("\033c", end="") | |
exit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# @lint-avoid-python-3-compatibility-imports | |
# | |
# mctop Memcached key operation analysis tool | |
# For Linux, uses BCC, eBPF. | |
# | |
# USAGE: mctop.py -p PID | |
# | |
# This uses in-kernel eBPF maps to trace and analyze key access rates and | |
# objects. This can help to spot hot keys, and tune memcached usage for | |
# performance. | |
# | |
# Copyright 2019 Shopify, Inc. | |
# Licensed under the Apache License, Version 2.0 (the "License") | |
# | |
# 17-Nov-2019 Dale Hamel Created this. | |
# Inspired by the ruby tool of the same name by Marcus Barczak in 2012, | |
# see https://codeascraft.com/2012/12/13/mctop-a-tool-for-analyzing-memcache-get-traffic/ | |
# see also https://github.com/tumblr/memkeys | |
from __future__ import print_function | |
from time import sleep, strftime, monotonic | |
from bcc import BPF, USDT, utils | |
from subprocess import call | |
import argparse | |
import sys | |
import select | |
import tty | |
import termios | |
import pickle | |
# FIXME better help | |
# arguments | |
examples = """examples: | |
./mctop -p PID # memcached usage top, 1 second refresh | |
""" | |
parser = argparse.ArgumentParser( | |
description="Memcached top key analysis", | |
formatter_class=argparse.RawDescriptionHelpFormatter, | |
epilog=examples) | |
parser.add_argument("-p", "--pid", type=int, help="process id to attach to") | |
parser.add_argument("-o", "--output", action="store", | |
help="save map data to pickle file dump command is issued") # FIXME make this JSON | |
parser.add_argument("-C", "--noclear", action="store_true", | |
help="don't clear the screen") | |
parser.add_argument("-r", "--maxrows", default=20, | |
help="maximum rows to print, default 20") | |
parser.add_argument("interval", nargs="?", default=1, | |
help="output interval, in seconds") | |
parser.add_argument("count", nargs="?", default=99999999, | |
help="number of outputs") | |
parser.add_argument("--ebpf", action="store_true", | |
help=argparse.SUPPRESS) | |
# FIXME clean this up | |
args = parser.parse_args() | |
interval = int(args.interval) | |
countdown = int(args.count) | |
maxrows = int(args.maxrows) | |
clear = not int(args.noclear) | |
outfile = args.output | |
pid = args.pid | |
old_settings = termios.tcgetattr(sys.stdin) | |
sort_mode = "C" | |
sort_ascending = True | |
exiting = 0 | |
first_loop = True | |
sort_modes = { | |
"C" : "calls", # total calls to key | |
"S" : "size", # latest size of key | |
"R" : "req/s", # requests per second to this key | |
"B" : "bw", # total bytes accesses on this key | |
"N" : "ts" # timestamp of the latest access | |
} | |
commands = { | |
"T" : "toggle", # sorting by ascending / descending order | |
"D" : "dump", # clear eBPF maps and dump to disk (if set) | |
"Q" : "quit" # exit mctop | |
} | |
# FIXME have helper to generate per type? | |
# load BPF program | |
bpf_text = """ | |
#include <uapi/linux/ptrace.h> | |
#include <bcc/proto.h> | |
#define MAX_STRING_LENGTH 80 | |
// Must match python definitions | |
typedef enum {START, END, GET, ADD, SET, REPLACE, PREPEND, APPEND, | |
TOUCH, CAS, INCR, DECR, DELETE} memcached_op_t; | |
struct keyhit_t { | |
char keystr[MAX_STRING_LENGTH]; | |
}; | |
struct value_t { | |
u64 count; | |
u64 bytecount; | |
u64 totalbytes; | |
u64 keysize; | |
u64 timestamp; | |
}; | |
BPF_HASH(keyhits, struct keyhit_t, struct value_t); | |
int trace_entry(struct pt_regs *ctx) { | |
u64 keystr = 0; | |
int32_t bytecount = 0; // type is -4@%eax in stap notes, which is int32 | |
uint8_t keysize = 0; // type is 1@%cl, which should be uint8 | |
struct keyhit_t keyhit = {0}; | |
struct value_t *valp, zero = {}; | |
bpf_usdt_readarg(2, ctx, &keystr); | |
bpf_usdt_readarg(3, ctx, &keysize); | |
bpf_usdt_readarg(4, ctx, &bytecount); | |
// see https://github.com/memcached/memcached/issues/576 | |
// ideally per https://github.com/iovisor/bcc/issues/1260 we should be able to | |
// read just the size we need, but this doesn't seem possible and throws a | |
// verifier error | |
bpf_probe_read(&keyhit.keystr, sizeof(keyhit.keystr), (void *)keystr); | |
valp = keyhits.lookup_or_init(&keyhit, &zero); | |
valp->count++; | |
valp->bytecount = bytecount; | |
valp->keysize = keysize; | |
valp->totalbytes += bytecount; | |
valp->timestamp = bpf_ktime_get_ns(); | |
return 0; | |
} | |
""" | |
# Since it is possible that we read the keys incorrectly, we need to fix the | |
# hash keys and combine their values intelligently here, producing a new hash | |
# see https://github.com/memcached/memcached/issues/576 | |
# A possible solution may be in flagging to the verifier that the size given | |
# by a usdt argument is less than the buffer size, | |
# see https://github.com/iovisor/bcc/issues/1260#issuecomment-406365168 | |
def reconcile_keys(bpf_map): | |
new_map = {} | |
for k,v in bpf_map.items(): | |
shortkey = k.keystr[:v.keysize].decode('utf-8', 'replace') | |
if shortkey in new_map: | |
# Sum counts on key collision | |
new_map[shortkey]['count'] += v.count | |
new_map[shortkey]['totalbytes'] += v.totalbytes | |
# If there is a key collision, take the data for the latest one | |
if v.timestamp > new_map[shortkey]['timestamp']: | |
new_map[shortkey]['bytecount'] = v.bytecount | |
new_map[shortkey]['timestamp'] = v.timestamp | |
else: | |
new_map[shortkey] = { | |
"count": v.count, | |
"bytecount": v.bytecount, | |
"totalbytes": v.totalbytes, | |
"timestamp": v.timestamp, | |
} | |
return new_map | |
def sort_output(unsorted_map): | |
global sort_mode | |
global sort_ascending | |
output = unsorted_map | |
if sort_mode == "C": | |
output = sorted(output.items(), key=lambda x: x[1]['count']) | |
elif sort_mode == "S": | |
output = sorted(output.items(), key=lambda x: x[1]['bytecount']) | |
elif sort_mode == "R": | |
output = sorted(output.items(), key=lambda x: x[1]['bandwidth']) | |
elif sort_mode == "B": | |
output = sorted(output.items(), key=lambda x: x[1]['cps']) | |
elif sort_mode == "N": | |
output = sorted(output.items(), key=lambda x: x[1]['timestamp']) | |
if sort_ascending: | |
output = reversed(output) | |
return output | |
# Set stdin to non-blocking reads so we can poll for chars | |
def readKey(interval): | |
new_settings = termios.tcgetattr(sys.stdin) | |
new_settings[3] = new_settings[3] & ~(termios.ECHO | termios.ICANON) | |
tty.setcbreak(sys.stdin.fileno()) | |
if select.select([sys.stdin], [], [], 5) == ([sys.stdin], [], []): | |
key = sys.stdin.read(1).lower() | |
global sort_mode | |
if key == 't': | |
global sort_ascending | |
sort_ascending = not sort_ascending | |
elif key == 'c': | |
sort_mode= 'C' | |
elif key == 's': | |
sort_mode= 'S' | |
elif key == 'r': | |
sort_mode= 'R' | |
elif key == 'b': | |
sort_mode= 'B' | |
elif key == 'n': | |
sort_mode= 'N' | |
elif key == 'd': | |
global outfile | |
global bpf | |
global sorted_output | |
keyhits = bpf.get_table("keyhits") | |
out = open ('/tmp/%s.mcdump' % outfile, 'wb') | |
pickle.dump(sorted_output, out) | |
out.close | |
keyhits.clear() | |
elif key == 'q': | |
global exiting | |
exiting = 1 | |
if args.ebpf: | |
print(bpf_text) | |
exit() | |
usdt = USDT(pid=pid) | |
usdt.enable_probe(probe="command__set", fn_name="trace_entry") # FIXME use fully specified version, port this to python | |
bpf = BPF(text=bpf_text, usdt_contexts=[usdt]) | |
start = monotonic(); # FIXME would prefer monotonic_ns, if 3.7+ | |
while 1: | |
try: | |
if not first_loop: | |
readKey(interval) | |
else: | |
first_loop = False | |
except KeyboardInterrupt: | |
exiting = 1 | |
# header | |
if clear: | |
print("\033c", end="") | |
print("%-30s %8s %8s %8s %8s %8s" % ("MEMCACHED KEY", "CALLS", | |
"OBJSIZE", "REQ/S", | |
"BW(kbps)", "TOTAL") ) | |
keyhits = bpf.get_table("keyhits") | |
line = 0 | |
interval = monotonic() - start; | |
fixed_map = reconcile_keys(keyhits) | |
for k,v in fixed_map.items(): | |
fixed_map[k]['cps'] = v['count'] / interval; | |
fixed_map[k]['bandwidth'] = (v['totalbytes'] / 1000) / interval; | |
sorted_output = sort_output(fixed_map) | |
for i, tup in enumerate(sorted_output): # FIXME sort this | |
k = tup[0]; v = tup[1] | |
print("%-30s %8d %8d %8f %8f %8d" % (k, v['count'], v['bytecount'], | |
v['cps'], v['bandwidth'], | |
v['totalbytes']) ) | |
line += 1 | |
if line >= maxrows: | |
break | |
print((maxrows - line) * "\r\n") | |
sys.stdout.write("[Curr: %s/%s Opt: %s:%s|%s:%s|%s:%s|%s:%s|%s:%s]" % | |
(sort_mode, | |
"Asc" if sort_ascending else "Dsc", | |
'C', sort_modes['C'], | |
'S', sort_modes['S'], | |
'R', sort_modes['R'], | |
'B', sort_modes['B'], | |
'N', sort_modes['N'] | |
)) | |
sys.stdout.write("[%s:%s %s:%s %s:%s]" % ( | |
'T', commands['T'], | |
'D', commands['D'], | |
'Q', commands['Q'] | |
)) | |
print("\033[%d;%dH" % (0, 0)) | |
if exiting: | |
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) | |
print("\033c", end="") | |
exit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# To generate load for tracing, after starting docker image | |
memtier_benchmark --server localhost --port 11211 -P memcache_text --key-pattern=G:G |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bpf: Failed to load program: Permission denied | |
0: (b7) r3 = 0 | |
1: (7b) *(u64 *)(r10 -8) = r3 | |
last_idx 1 first_idx 0 | |
regs=8 stack=0 before 0: (b7) r3 = 0 | |
2: (7b) *(u64 *)(r10 -16) = r3 | |
3: (7b) *(u64 *)(r10 -24) = r3 | |
4: (7b) *(u64 *)(r10 -32) = r3 | |
5: (7b) *(u64 *)(r10 -40) = r3 | |
6: (7b) *(u64 *)(r10 -48) = r3 | |
7: (7b) *(u64 *)(r10 -56) = r3 | |
8: (7b) *(u64 *)(r10 -64) = r3 | |
9: (7b) *(u64 *)(r10 -72) = r3 | |
10: (7b) *(u64 *)(r10 -80) = r3 | |
11: (7b) *(u64 *)(r10 -88) = r3 | |
12: (7b) *(u64 *)(r10 -96) = r3 | |
13: (7b) *(u64 *)(r10 -104) = r3 | |
14: (7b) *(u64 *)(r10 -112) = r3 | |
15: (7b) *(u64 *)(r10 -120) = r3 | |
16: (79) r2 = *(u64 *)(r1 +128) | |
17: (18) r4 = 0x55c2c9ad1ace | |
19: (1d) if r2 == r4 goto pc+5 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0) R3_w=invP0 R4_w=inv94295095581390 R10=fp0 fp-8_w=00000000 fp-16_w=00000000 fp-24_w=00000000 fp-32_w=00000000 fp-40_w=00000000 fp-48_w=00000000 fp-56_w=00000000 fp-64_w=00000000 fp-72_w=00000000 fp-80_w=00000000 fp-88_w=00000000 fp-96_w=00000000 fp-104_w=00000000 fp-112_w=00000000 fp-120_w=00000000 | |
20: (18) r4 = 0x55c2c9ad0a66 | |
22: (5d) if r2 != r4 goto pc+4 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv94295095577190 R3_w=invP0 R4_w=inv94295095577190 R10=fp0 fp-8_w=00000000 fp-16_w=00000000 fp-24_w=00000000 fp-32_w=00000000 fp-40_w=00000000 fp-48_w=00000000 fp-56_w=00000000 fp-64_w=00000000 fp-72_w=00000000 fp-80_w=00000000 fp-88_w=00000000 fp-96_w=00000000 fp-104_w=00000000 fp-112_w=00000000 fp-120_w=00000000 | |
23: (79) r3 = *(u64 *)(r1 +104) | |
24: (05) goto pc+1 | |
26: (79) r2 = *(u64 *)(r1 +128) | |
27: (18) r4 = 0x55c2c9ad1ace | |
29: (1d) if r2 == r4 goto pc+6 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0) R3=inv(id=0) R4_w=inv94295095581390 R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
30: (b7) r7 = 0 | |
31: (18) r4 = 0x55c2c9ad0a66 | |
33: (5d) if r2 != r4 goto pc+3 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv94295095577190 R3=inv(id=0) R4_w=inv94295095577190 R7_w=inv0 R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
34: (79) r7 = *(u64 *)(r1 +88) | |
35: (05) goto pc+1 | |
37: (79) r2 = *(u64 *)(r1 +128) | |
38: (18) r4 = 0x55c2c9ad1ace | |
40: (1d) if r2 == r4 goto pc+6 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0) R3=inv(id=0) R4_w=inv94295095581390 R7=inv(id=0) R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
41: (b7) r8 = 0 | |
42: (18) r4 = 0x55c2c9ad0a66 | |
44: (5d) if r2 != r4 goto pc+3 | |
R1=ctx(id=0,off=0,imm=0) R2_w=inv94295095577190 R3=inv(id=0) R4_w=inv94295095577190 R7=inv(id=0) R8_w=inv0 R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
45: (79) r8 = *(u64 *)(r1 +80) | |
46: (05) goto pc+1 | |
48: (bf) r1 = r7 | |
49: (57) r1 &= 255 | |
50: (b7) r4 = 80 | |
51: (bf) r2 = r7 | |
52: (2d) if r4 > r1 goto pc+1 | |
R1_w=inv(id=0,umin_value=80,umax_value=255,var_off=(0x0; 0xff)) R2_w=inv(id=0) R3=inv(id=0) R4_w=inv80 R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
53: (b7) r2 = 80 | |
54: (57) r2 &= 255 | |
55: (bf) r6 = r10 | |
56: (07) r6 += -80 | |
57: (bf) r1 = r6 | |
58: (85) call bpf_probe_read#4 | |
last_idx 58 first_idx 48 | |
regs=4 stack=0 before 57: (bf) r1 = r6 | |
regs=4 stack=0 before 56: (07) r6 += -80 | |
regs=4 stack=0 before 55: (bf) r6 = r10 | |
regs=4 stack=0 before 54: (57) r2 &= 255 | |
regs=4 stack=0 before 53: (b7) r2 = 80 | |
59: (18) r1 = 0xffff9664db8bf400 | |
61: (bf) r2 = r6 | |
62: (85) call bpf_map_lookup_elem#1 | |
63: (bf) r6 = r0 | |
64: (55) if r6 != 0x0 goto pc+15 | |
R0_w=inv0 R6_w=inv0 R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
65: (18) r1 = 0xffff9664db8bf400 | |
67: (bf) r6 = r10 | |
68: (07) r6 += -80 | |
69: (bf) r3 = r10 | |
70: (07) r3 += -120 | |
71: (bf) r2 = r6 | |
72: (b7) r4 = 1 | |
73: (85) call bpf_map_update_elem#2 | |
74: (18) r1 = 0xffff9664db8bf400 | |
76: (bf) r2 = r6 | |
77: (85) call bpf_map_lookup_elem#1 | |
78: (bf) r6 = r0 | |
79: (15) if r6 == 0x0 goto pc+13 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
80: (57) r7 &= 255 | |
81: (7b) *(u64 *)(r6 +24) = r7 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
82: (79) r1 = *(u64 *)(r6 +0) | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
83: (07) r1 += 1 | |
84: (7b) *(u64 *)(r6 +0) = r1 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
85: (67) r8 <<= 32 | |
86: (c7) r8 s>>= 32 | |
87: (7b) *(u64 *)(r6 +8) = r8 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
88: (79) r1 = *(u64 *)(r6 +16) | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
89: (0f) r1 += r8 | |
90: (7b) *(u64 *)(r6 +16) = r1 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
91: (85) call bpf_ktime_get_ns#5 | |
92: (7b) *(u64 *)(r6 +32) = r0 | |
R0=inv(id=0) R6=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
93: (b7) r0 = 0 | |
94: (95) exit | |
from 79 to 93: R0_w=inv0 R6_w=inv0 R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=mmmmmmmm fp-96=mmmmmmmm fp-104=mmmmmmmm fp-112=mmmmmmmm fp-120=mmmmmmmm | |
93: (b7) r0 = 0 | |
94: (95) exit | |
from 64 to 80: R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
80: (57) r7 &= 255 | |
81: (7b) *(u64 *)(r6 +24) = r7 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
82: (79) r1 = *(u64 *)(r6 +0) | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
83: (07) r1 += 1 | |
84: (7b) *(u64 *)(r6 +0) = r1 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8=inv(id=0) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
85: (67) r8 <<= 32 | |
86: (c7) r8 s>>= 32 | |
87: (7b) *(u64 *)(r6 +8) = r8 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
88: (79) r1 = *(u64 *)(r6 +16) | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
89: (0f) r1 += r8 | |
90: (7b) *(u64 *)(r6 +16) = r1 | |
R0_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R1_w=inv(id=0) R6_w=map_value(id=0,off=0,ks=80,vs=40,imm=0) R7_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R8_w=inv(id=0,smin_value=-2147483648,smax_value=2147483647) R10=fp0 fp-8=mmmmmmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm fp-56=mmmmmmmm fp-64=mmmmmmmm fp-72=mmmmmmmm fp-80=mmmmmmmm fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
91: (85) call bpf_ktime_get_ns#5 | |
92: safe | |
from 52 to 54: R1=inv(id=0,umax_value=79,var_off=(0x0; 0x7f)) R2=inv(id=0) R3=inv(id=0) R4=inv80 R7=inv(id=0) R8=inv(id=0) R10=fp0 fp-8=00000000 fp-16=00000000 fp-24=00000000 fp-32=00000000 fp-40=00000000 fp-48=00000000 fp-56=00000000 fp-64=00000000 fp-72=00000000 fp-80=00000000 fp-88=00000000 fp-96=00000000 fp-104=00000000 fp-112=00000000 fp-120=00000000 | |
54: (57) r2 &= 255 | |
55: (bf) r6 = r10 | |
56: (07) r6 += -80 | |
57: (bf) r1 = r6 | |
58: (85) call bpf_probe_read#4 | |
invalid stack type R1 off=-80 access_size=255 | |
processed 103 insns (limit 1000000) max_states_per_insn 0 total_states 7 peak_states 7 mark_read 4 | |
Traceback (most recent call last): | |
File "tools/mctop.py", line 237, in <module> | |
bpf = BPF(text=bpf_text, usdt_contexts=[usdt]) | |
File "/usr/lib64/python3.6/site-packages/bcc/__init__.py", line 339, in __init__ | |
usdt_context.attach_uprobes(self) | |
File "/usr/lib64/python3.6/site-packages/bcc/usdt.py", line 194, in attach_uprobes | |
addr=addr, pid=pid) | |
File "/usr/lib64/python3.6/site-packages/bcc/__init__.py", line 1008, in attach_uprobe | |
fn = self.load_func(fn_name, BPF.KPROBE) | |
File "/usr/lib64/python3.6/site-packages/bcc/__init__.py", line 384, in load_func | |
(func_name, errstr)) | |
Exception: Failed to load BPF program b'trace_entry': Permission denied |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Disassembly of section .bpf.fn.trace_entry: | |
trace_entry: | |
; int trace_entry(struct pt_regs *ctx) { // Line 83 | |
0: b7 03 00 00 00 00 00 00 r3 = 0 | |
; struct keyhit_t keyhit = {0}; // Line 88 | |
1: 73 3a fe ff 00 00 00 00 *(u8 *)(r10 - 2) = r3 | |
2: 6b 3a fc ff 00 00 00 00 *(u16 *)(r10 - 4) = r3 | |
3: 63 3a f8 ff 00 00 00 00 *(u32 *)(r10 - 8) = r3 | |
4: 7b 3a f0 ff 00 00 00 00 *(u64 *)(r10 - 16) = r3 | |
5: 7b 3a e8 ff 00 00 00 00 *(u64 *)(r10 - 24) = r3 | |
6: 7b 3a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r3 | |
7: 7b 3a d8 ff 00 00 00 00 *(u64 *)(r10 - 40) = r3 | |
8: 7b 3a d0 ff 00 00 00 00 *(u64 *)(r10 - 48) = r3 | |
9: 7b 3a c8 ff 00 00 00 00 *(u64 *)(r10 - 56) = r3 | |
10: 7b 3a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r3 | |
11: 7b 3a b8 ff 00 00 00 00 *(u64 *)(r10 - 72) = r3 | |
12: 7b 3a b0 ff 00 00 00 00 *(u64 *)(r10 - 80) = r3 | |
13: 7b 3a a8 ff 00 00 00 00 *(u64 *)(r10 - 88) = r3 | |
14: 7b 3a a0 ff 00 00 00 00 *(u64 *)(r10 - 96) = r3 | |
15: 7b 3a 98 ff 00 00 00 00 *(u64 *)(r10 - 104) = r3 | |
16: 7b 3a 90 ff 00 00 00 00 *(u64 *)(r10 - 112) = r3 | |
17: 7b 3a 88 ff 00 00 00 00 *(u64 *)(r10 - 120) = r3 | |
18: 7b 3a 80 ff 00 00 00 00 *(u64 *)(r10 - 128) = r3 | |
19: 7b 3a 78 ff 00 00 00 00 *(u64 *)(r10 - 136) = r3 | |
20: 7b 3a 70 ff 00 00 00 00 *(u64 *)(r10 - 144) = r3 | |
21: 7b 3a 68 ff 00 00 00 00 *(u64 *)(r10 - 152) = r3 | |
22: 7b 3a 60 ff 00 00 00 00 *(u64 *)(r10 - 160) = r3 | |
23: 7b 3a 58 ff 00 00 00 00 *(u64 *)(r10 - 168) = r3 | |
24: 7b 3a 50 ff 00 00 00 00 *(u64 *)(r10 - 176) = r3 | |
25: 7b 3a 48 ff 00 00 00 00 *(u64 *)(r10 - 184) = r3 | |
26: 7b 3a 40 ff 00 00 00 00 *(u64 *)(r10 - 192) = r3 | |
27: 7b 3a 38 ff 00 00 00 00 *(u64 *)(r10 - 200) = r3 | |
28: 7b 3a 30 ff 00 00 00 00 *(u64 *)(r10 - 208) = r3 | |
29: 7b 3a 28 ff 00 00 00 00 *(u64 *)(r10 - 216) = r3 | |
30: 7b 3a 20 ff 00 00 00 00 *(u64 *)(r10 - 224) = r3 | |
31: 7b 3a 18 ff 00 00 00 00 *(u64 *)(r10 - 232) = r3 | |
32: 7b 3a 10 ff 00 00 00 00 *(u64 *)(r10 - 240) = r3 | |
33: 7b 3a 08 ff 00 00 00 00 *(u64 *)(r10 - 248) = r3 | |
34: 7b 3a 00 ff 00 00 00 00 *(u64 *)(r10 - 256) = r3 | |
; struct value_t *valp, zero = {}; // Line 89 | |
35: 7b 3a f8 fe 00 00 00 00 *(u64 *)(r10 - 264) = r3 | |
36: 7b 3a f0 fe 00 00 00 00 *(u64 *)(r10 - 272) = r3 | |
37: 7b 3a e8 fe 00 00 00 00 *(u64 *)(r10 - 280) = r3 | |
38: 7b 3a e0 fe 00 00 00 00 *(u64 *)(r10 - 288) = r3 | |
39: 7b 3a d8 fe 00 00 00 00 *(u64 *)(r10 - 296) = r3 | |
; switch(PT_REGS_IP(ctx)) { // Line 24 | |
40: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
41: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
43: 1d 42 05 00 00 00 00 00 if r2 == r4 goto +5 | |
44: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
46: 5d 42 04 00 00 00 00 00 if r2 != r4 goto +4 | |
; case 0x56441ad30a66ULL: *((uint64_t *)dest) = ctx->si; __asm__ __volatile__("": : :"memory"); return 0; // Line 25 | |
47: 79 13 68 00 00 00 00 00 r3 = *(u64 *)(r1 + 104) | |
48: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((uint64_t *)dest) = ctx->si; __asm__ __volatile__("": : :"memory"); return 0; // Line 26 | |
49: 79 13 68 00 00 00 00 00 r3 = *(u64 *)(r1 + 104) | |
50: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
51: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
; switch(PT_REGS_IP(ctx)) { // Line 33 | |
53: 1d 42 06 00 00 00 00 00 if r2 == r4 goto +6 | |
54: b7 06 00 00 00 00 00 00 r6 = 0 | |
55: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
57: 5d 42 03 00 00 00 00 00 if r2 != r4 goto +3 | |
; case 0x56441ad30a66ULL: *((uint8_t *)dest) = ctx->cx; __asm__ __volatile__("": : :"memory"); return 0; // Line 34 | |
58: 71 16 58 00 00 00 00 00 r6 = *(u8 *)(r1 + 88) | |
59: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((uint8_t *)dest) = ctx->cx; __asm__ __volatile__("": : :"memory"); return 0; // Line 35 | |
60: 71 16 58 00 00 00 00 00 r6 = *(u8 *)(r1 + 88) | |
61: 79 12 80 00 00 00 00 00 r2 = *(u64 *)(r1 + 128) | |
; switch(PT_REGS_IP(ctx)) { // Line 42 | |
62: 18 04 00 00 ce 1a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507807438 ll | |
64: 1d 42 06 00 00 00 00 00 if r2 == r4 goto +6 | |
65: b7 08 00 00 00 00 00 00 r8 = 0 | |
66: 18 04 00 00 66 0a d3 1a 00 00 00 00 44 56 00 00 r4 = 94850507803238 ll | |
68: 5d 42 03 00 00 00 00 00 if r2 != r4 goto +3 | |
; case 0x56441ad30a66ULL: *((int32_t *)dest) = ctx->ax; __asm__ __volatile__("": : :"memory"); return 0; // Line 43 | |
69: 79 18 50 00 00 00 00 00 r8 = *(u64 *)(r1 + 80) | |
70: 05 00 01 00 00 00 00 00 goto +1 | |
; case 0x56441ad31aceULL: *((int32_t *)dest) = ctx->ax; __asm__ __volatile__("": : :"memory"); return 0; // Line 44 | |
71: 79 18 50 00 00 00 00 00 r8 = *(u64 *)(r1 + 80) | |
72: bf a7 00 00 00 00 00 00 r7 = r10 | |
; bpf_probe_read(&keyhit.keystr, probe_read_size, (void *)keystr); // Line 102 | |
73: 07 07 00 00 00 ff ff ff r7 += -256 | |
74: bf 71 00 00 00 00 00 00 r1 = r7 | |
75: bf 62 00 00 00 00 00 00 r2 = r6 | |
76: 85 00 00 00 04 00 00 00 call 4 | |
; valp = ({typeof(keyhits.leaf) *leaf = bpf_map_lookup_elem_(bpf_pseudo_fd(1, -1), &keyhit); if (!leaf) { bpf_map_update_elem_(bpf_pseudo_fd(1, -1), &keyhit, &zero, BPF_NOEXIST); leaf = bpf_map_lookup_elem_(bpf_pseudo_fd(1, -1), &keyhit); if (!leaf) return 0;}leaf;}); // Line 104 | |
77: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
79: bf 72 00 00 00 00 00 00 r2 = r7 | |
80: 85 00 00 00 01 00 00 00 call 1 | |
81: bf 07 00 00 00 00 00 00 r7 = r0 | |
82: 55 07 0f 00 00 00 00 00 if r7 != 0 goto +15 | |
83: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
85: bf a7 00 00 00 00 00 00 r7 = r10 | |
86: 07 07 00 00 00 ff ff ff r7 += -256 | |
87: bf a3 00 00 00 00 00 00 r3 = r10 | |
88: 07 03 00 00 d8 fe ff ff r3 += -296 | |
89: bf 72 00 00 00 00 00 00 r2 = r7 | |
90: b7 04 00 00 01 00 00 00 r4 = 1 | |
91: 85 00 00 00 02 00 00 00 call 2 | |
92: 18 11 00 00 ff ff ff ff 00 00 00 00 00 00 00 00 ld_pseudo r1, 1, 4294967295 | |
94: bf 72 00 00 00 00 00 00 r2 = r7 | |
95: 85 00 00 00 01 00 00 00 call 1 | |
96: bf 07 00 00 00 00 00 00 r7 = r0 | |
97: 15 07 0c 00 00 00 00 00 if r7 == 0 goto +12 | |
; valp->keysize = keysize; // Line 107 | |
98: 7b 67 18 00 00 00 00 00 *(u64 *)(r7 + 24) = r6 | |
; valp->count++; // Line 105 | |
99: 79 71 00 00 00 00 00 00 r1 = *(u64 *)(r7 + 0) | |
100: 07 01 00 00 01 00 00 00 r1 += 1 | |
101: 7b 17 00 00 00 00 00 00 *(u64 *)(r7 + 0) = r1 | |
; valp->bytecount = bytecount; // Line 106 | |
102: 67 08 00 00 20 00 00 00 r8 <<= 32 | |
103: c7 08 00 00 20 00 00 00 r8 s>>= 32 | |
104: 7b 87 08 00 00 00 00 00 *(u64 *)(r7 + 8) = r8 | |
; valp->totalbytes += bytecount; // Line 108 | |
105: 79 71 10 00 00 00 00 00 r1 = *(u64 *)(r7 + 16) | |
106: 0f 81 00 00 00 00 00 00 r1 += r8 | |
107: 7b 17 10 00 00 00 00 00 *(u64 *)(r7 + 16) = r1 | |
; valp->timestamp = bpf_ktime_get_ns(); // Line 109 | |
108: 85 00 00 00 05 00 00 00 call 5 | |
109: 7b 07 20 00 00 00 00 00 *(u64 *)(r7 + 32) = r0 | |
; } // Line 113 | |
110: b7 00 00 00 00 00 00 00 r0 = 0 | |
111: 95 00 00 00 00 00 00 00 exit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment