Skip to content

Instantly share code, notes, and snippets.

@tuxology
Created December 8, 2014 17:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tuxology/68fbd813b6eb84fb9766 to your computer and use it in GitHub Desktop.
Save tuxology/68fbd813b6eb84fb9766 to your computer and use it in GitHub Desktop.
LTTng sched_switch eBPF filter
/*
* addons/lttng-sched-filter.c
*
* A filtered version of sched_switch
*
* Copyright (C) 2014 Suchakra Sharma <suchakrapani.sharma@polymtl.ca>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; only
* version 2.1 of the License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/uaccess.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <trace/bpf_trace.h>
#include <asm/syscall.h>
#include <linux/interrupt.h>
#include <linux/time.h>
#include <uapi/linux/time.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
//#include <linux/kallsyms.h>
#include <linux/sched.h>
#include <linux/binfmts.h>
#include <linux/version.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0))
#include <linux/sched/rt.h>
#endif
#include "../wrapper/tracepoint.h"
#include "../instrumentation/events/lttng-module/addons.h"
#define BPF 1
#define SIMPLE 0
#define NOFILT 0
DEFINE_TRACE(sched_switch_filter);
/* Procfs stuff */
#define MAX_LEN 16000000
static struct proc_dir_entry *proc_entry;
static char *accum_time;
u64 len = 0;
static int ebpf_proc_show(struct seq_file *m, void *v) {
seq_printf(m, accum_time);
return 0;
}
static int ebpf_proc_open(struct inode *inode, struct file *file) {
return single_open(file, ebpf_proc_show, NULL);
}
static const struct file_operations ebpf_proc_fops = {
.owner = THIS_MODULE,
.open = ebpf_proc_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/* Timing stuff */
atomic_t count = ATOMIC_INIT(0);
/* Global definitions */
struct bpf_prog *prog;
/* The actual eBPF prog instructions */
static struct bpf_insn insn_prog[] = {
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 0), /* r2 = bctx (which is therefore arg1, and thus, prev->comm) */
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_2, 0), /* r3 = *(prev->comm) */
BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 8), /* r4 = comm */
BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), /* r4 = which is "sshd" */
BPF_JMP_REG(BPF_JEQ, BPF_REG_5, BPF_REG_3, 3),
BPF_LD_IMM64(BPF_REG_0, 0), /* FALSE */
BPF_EXIT_INSN(),
BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 16), /* r3 = *(prev->state) */
BPF_LD_IMM64(BPF_REG_4, 0), /* r4 = 0 */
BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_4, 3),
BPF_LD_IMM64(BPF_REG_0, 0), /* FALSE */
BPF_EXIT_INSN(),
BPF_LD_IMM64(BPF_REG_0, 1), /* TRUE */
BPF_EXIT_INSN(),
};
static void *u64_to_ptr(__u64 val){
return (void *) (unsigned long) val;
}
static __u64 ptr_to_u64(void *ptr){
return (__u64) (unsigned long) ptr;
}
void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
/* implementation dependent freeing */
map->ops->map_free(map);
}
void bpf_map_put(struct bpf_map *map)
{
if (atomic_dec_and_test(&map->refcnt)) {
INIT_WORK(&map->work, bpf_map_free_deferred);
schedule_work(&map->work);
}
}
static void free_used_maps(struct bpf_prog_aux *aux)
{
int i;
for (i = 0; i < aux->used_map_cnt; i++)
bpf_map_put(aux->used_maps[i]);
kfree(aux->used_maps);
}
unsigned int run_bpf_filter(struct bpf_prog *prog1, struct bpf_context *ctx){
rcu_read_lock();
u64 ret = BPF_PROG_RUN(prog1, (void*) ctx);
rcu_read_unlock();
return ret;
}
/* Inititlize and prepare the eBPF prog */
unsigned int init_ebpf_prog(void)
{
int ret = 0;
char bpf_log_buf[1024];
unsigned int insn_count = sizeof(insn_prog) / sizeof(struct bpf_insn);
union bpf_attr attr = {
.prog_type = BPF_PROG_TYPE_UNSPEC,
.insns = ptr_to_u64((void*) insn_prog),
.insn_cnt = insn_count,
.license = ptr_to_u64((void *) "GPL"),
.log_buf = ptr_to_u64(bpf_log_buf),
.log_size = 1024,
.log_level = 1,
};
prog = bpf_prog_alloc(bpf_prog_size(attr.insn_cnt), GFP_USER);
if (!prog)
return -ENOMEM;
prog->jited = false;
prog->orig_prog = NULL;
prog->len = attr.insn_cnt;
if (memcpy(prog->insnsi, u64_to_ptr(attr.insns), prog->len * sizeof(struct bpf_insn)) != 0)
atomic_set(&prog->aux->refcnt, 1);
prog->aux->is_gpl_compatible = true;
/* TODO eBPF verifier */
// char *sym_name = "bpf_check";
// unsigned long sym_addr = kallsyms_lookup_name(sym_name);
// int (*bpf_check)(struct bpf_prog*, union bpf_attr*) =
// (int (*)(struct bpf_prog*, union bpf_attr*) ) sym_addr;
// ret = bpf_check(prog, &attr);
/* ready for JIT */
bpf_prog_select_runtime(prog);
printk("prog jited? : %d\n", prog->jited);
return 0;
}
unsigned int filter_dev_probe_handler(void* __data, struct rq *rq, struct task_struct *prev, struct task_struct *next)
{
struct timespec begin, end, diff;
char comm[8] = {};
strcpy(comm, "sshd");
char pcomm[8] = {};
strcpy(pcomm, prev->comm);
struct bpf_context bctx = {};
bctx.arg1 = (u64) pcomm;
bctx.arg2 = (u64) comm;
bctx.arg3 = (u64) prev->state;
/* tick */
getrawmonotonic(&begin);
#if (NOFILT)
trace_sched_switch_filter(prev, next);
#elif (SIMPLE)
if ((memcmp(prev->comm, comm, 4) == 0) && (prev->state == 0))
{
trace_sched_switch_filter(prev, next);
}
#elif (BPF)
unsigned int ret = 0;
ret = run_bpf_filter(prog, &bctx);
if (ret == 1){
trace_sched_switch_filter(prev, next);
}
#endif
/* tock */
getrawmonotonic(&end);
diff = timespec_sub(end, begin);
atomic_inc(&count);
sprintf(accum_time + strlen(accum_time), "%d\t%lu\n", atomic_read(&count), diff.tv_nsec);
return 0;
}
static int __init sched_switch_filter_init(void)
{
int ret = 0;
#if (SIMPLE)
printk("SIMPLE RUN\n");
#elif (BPF)
printk("BPF RUN\n");
/* Prepare eBPF prog*/
ret = init_ebpf_prog();
#endif
/* Init procfs entry */
accum_time = (char*) vmalloc(MAX_LEN);
memset(accum_time, 0, MAX_LEN);
proc_entry = proc_create("eBPFsched", 0, NULL, &ebpf_proc_fops);
if (proc_entry == NULL)
{
ret = -1;
vfree(accum_time);
printk(KERN_INFO "eBPFsched could not be created\n");
}
else
{
printk(KERN_INFO "eBPFsched created.\n");
}
(void) wrapper_lttng_fixup_sig(THIS_MODULE);
ret = lttng_wrapper_tracepoint_probe_register("sched_switch",
filter_dev_probe_handler, NULL);
if (ret)
goto error;
printk("sched_switch_filter loaded\n");
return 0;
error:
return ret;
}
static void __exit sched_switch_filter_exit(void)
{
int ret;
#if (BPF)
free_used_maps(prog->aux);
printk("Freed maps\n");
bpf_prog_free(prog);
printk("Freed bpf prog\n");
#endif
/* Remove procfs entry */
remove_proc_entry("eBPFsched", NULL);
printk(KERN_INFO "eBPFsched removed\n");
vfree(accum_time);
ret = lttng_wrapper_tracepoint_probe_unregister("sched_switch",
filter_dev_probe_handler, NULL);
printk("sched_switch_filter unloaded\n");
return;
}
module_init(sched_switch_filter_init);
module_exit(sched_switch_filter_exit);
MODULE_LICENSE("GPL and additional rights");
MODULE_AUTHOR("Suchakra Sharma <suchakrapani.sharma@polymtl.ca>");
MODULE_DESCRIPTION("LTTng filtered sched_switch");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment