// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#define _GNU_SOURCE
#include <argp.h>
#include <unistd.h>
#include <stdint.h>
#include "bench.h"
#include "trigger_bench.skel.h"
#include "trace_helpers.h"
#define MAX_TRIG_BATCH_ITERS 1000
static struct {
__u32 batch_iters;
} args = {
.batch_iters = 100,
};
enum {
ARG_TRIG_BATCH_ITERS = 7000,
};
static const struct argp_option opts[] = {
{ "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
"Number of in-kernel iterations per one driver test run"},
{},
};
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
long ret;
switch (key) {
case ARG_TRIG_BATCH_ITERS:
ret = strtol(arg, NULL, 10);
if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
1, MAX_TRIG_BATCH_ITERS);
argp_usage(state);
}
args.batch_iters = ret;
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
const struct argp bench_trigger_batch_argp = {
.options = opts,
.parser = parse_arg,
};
/* adjust slot shift in inc_hits() if changing */
#define MAX_BUCKETS 256
#pragma GCC diagnostic ignored "-Wattributes"
/* BPF triggering benchmarks */
static struct trigger_ctx {
struct trigger_bench *skel;
bool usermode_counters;
int driver_prog_fd;
} ctx;
static struct counter base_hits[MAX_BUCKETS];
static __always_inline void inc_counter(struct counter *counters)
{
static __thread int tid = 0;
unsigned slot;
if (unlikely(tid == 0))
tid = syscall(SYS_gettid);
/* multiplicative hashing, it's fast */
slot = 2654435769U * tid;
slot >>= 24;
atomic_inc(&base_hits[slot].value); /* use highest byte as an index */
}
static long sum_and_reset_counters(struct counter *counters)
{
int i;
long sum = 0;
for (i = 0; i < MAX_BUCKETS; i++)
sum += atomic_swap(&counters[i].value, 0);
return sum;
}
static void trigger_validate(void)
{
if (env.consumer_cnt != 0) {
fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
}
static void *trigger_producer(void *input)
{
if (ctx.usermode_counters) {
while (true) {
(void)syscall(__NR_getpgid);
inc_counter(base_hits);
}
} else {
while (true)
(void)syscall(__NR_getpgid);
}
return NULL;
}
static void *trigger_producer_batch(void *input)
{
int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
while (true)
bpf_prog_test_run_opts(fd, NULL);
return NULL;
}
static void trigger_measure(struct bench_res *res)
{
if (ctx.usermode_counters)
res->hits = sum_and_reset_counters(base_hits);
else
res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
}
static void setup_ctx(void)
{
setup_libbpf();
ctx.skel = trigger_bench__open();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
/* default "driver" BPF program */
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
ctx.skel->rodata->batch_iters = args.batch_iters;
}
static void load_ctx(void)
{
int err;
err = trigger_bench__load(ctx.skel);
if (err) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
}
static void attach_bpf(struct bpf_program *prog)
{
struct bpf_link *link;
link = bpf_program__attach(prog);
if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
}
static void trigger_syscall_count_setup(void)
{
ctx.usermode_counters = true;
}
/* Batched, staying mostly in-kernel triggering setups */
static void trigger_kernel_count_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
load_ctx();
/* override driver program */
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
}
static void trigger_kprobe_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
}
static void trigger_kretprobe_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
}
static void trigger_kprobe_multi_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
}
static void trigger_kretprobe_multi_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
}
static void trigger_fentry_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
static void trigger_fexit_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fexit);
}
static void trigger_fmodret_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);
load_ctx();
/* override driver program */
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
}
static void trigger_tp_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);
load_ctx();
/* override driver program */
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
attach_bpf(ctx.skel->progs.bench_trigger_tp);
}
static void trigger_rawtp_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);
load_ctx();
/* override driver program */
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
attach_bpf(ctx.skel->progs.bench_trigger_rawtp);
}
/* make sure call is not inlined and not avoided by compiler, so __weak and
* inline asm volatile in the body of the function
*
* There is a performance difference between uprobing at nop location vs other
* instructions. So use two different targets, one of which starts with nop
* and another doesn't.
*
* GCC doesn't generate stack setup preamble for these functions due to them
* having no input arguments and doing nothing in the body.
*/
__nocf_check __weak void uprobe_target_nop(void)
{
asm volatile ("nop");
}
__weak void opaque_noop_func(void)
{
}
__nocf_check __weak int uprobe_target_push(void)
{
/* overhead of function call is negligible compared to uprobe
* triggering, so this shouldn't affect benchmark results much
*/
opaque_noop_func();
return 1;
}
__nocf_check __weak void uprobe_target_ret(void)
{
asm volatile ("");
}
static void *uprobe_producer_count(void *input)
{
while (true) {
uprobe_target_nop();
inc_counter(base_hits);
}
return NULL;
}
static void *uprobe_producer_nop(void *input)
{
while (true)
uprobe_target_nop();
return NULL;
}
static void *uprobe_producer_push(void *input)
{
while (true)
uprobe_target_push();
return NULL;
}
static void *uprobe_producer_ret(void *input)
{
while (true)
uprobe_target_ret();
return NULL;
}
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
{
size_t uprobe_offset;
struct bpf_link *link;
int err;
setup_libbpf();
ctx.skel = trigger_bench__open();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
if (use_multi)
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);
else
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
err = trigger_bench__load(ctx.skel);
if (err) {
fprintf(stderr, "failed to load skeleton\n");
exit(1);
}
uprobe_offset = get_uprobe_offset(target_addr);
if (use_multi) {
LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
.retprobe = use_retprobe,
.cnt = 1,
.offsets = &uprobe_offset,
);
link = bpf_program__attach_uprobe_multi(
ctx.skel->progs.bench_trigger_uprobe_multi,
-1 /* all PIDs */, "/proc/self/exe", NULL, &opts);
ctx.skel->links.bench_trigger_uprobe_multi = link;
} else {
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
use_retprobe,
-1 /* all PIDs */,
"/proc/self/exe",
uprobe_offset);
ctx.skel->links.bench_trigger_uprobe = link;
}
if (!link) {
fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");
exit(1);
}
}
static void usermode_count_setup(void)
{
ctx.usermode_counters = true;
}
static void uprobe_nop_setup(void)
{
usetup(false, false /* !use_multi */, &uprobe_target_nop);
}
static void uretprobe_nop_setup(void)
{
usetup(true, false /* !use_multi */, &uprobe_target_nop);
}
static void uprobe_push_setup(void)
{
usetup(false, false /* !use_multi */, &uprobe_target_push);
}
static void uretprobe_push_setup(void)
{
usetup(true, false /* !use_multi */, &uprobe_target_push);
}
static void uprobe_ret_setup(void)
{
usetup(false, false /* !use_multi */, &uprobe_target_ret);
}
static void uretprobe_ret_setup(void)
{
usetup(true, false /* !use_multi */, &uprobe_target_ret);
}
static void uprobe_multi_nop_setup(void)
{
usetup(false, true /* use_multi */, &uprobe_target_nop);
}
static void uretprobe_multi_nop_setup(void)
{
usetup(true, true /* use_multi */, &uprobe_target_nop);
}
static void uprobe_multi_push_setup(void)
{
usetup(false, true /* use_multi */, &uprobe_target_push);
}
static void uretprobe_multi_push_setup(void)
{
usetup(true, true /* use_multi */, &uprobe_target_push);
}
static void uprobe_multi_ret_setup(void)
{
usetup(false, true /* use_multi */, &uprobe_target_ret);
}
static void uretprobe_multi_ret_setup(void)
{
usetup(true, true /* use_multi */, &uprobe_target_ret);
}
const struct bench bench_trig_syscall_count = {
.name = "trig-syscall-count",
.validate = trigger_validate,
.setup = trigger_syscall_count_setup,
.producer_thread = trigger_producer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
/* batched (staying mostly in kernel) kprobe/fentry benchmarks */
#define BENCH_TRIG_KERNEL(KIND, NAME) \
const struct bench bench_trig_##KIND = { \
.name = "trig-" NAME, \
.setup = trigger_##KIND##_setup, \
.producer_thread = trigger_producer_batch, \
.measure = trigger_measure, \
.report_progress = hits_drops_report_progress, \
.report_final = hits_drops_report_final, \
.argp = &bench_trigger_batch_argp, \
}
BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
BENCH_TRIG_KERNEL(kprobe, "kprobe");
BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
BENCH_TRIG_KERNEL(fentry, "fentry");
BENCH_TRIG_KERNEL(fexit, "fexit");
BENCH_TRIG_KERNEL(fmodret, "fmodret");
BENCH_TRIG_KERNEL(tp, "tp");
BENCH_TRIG_KERNEL(rawtp, "rawtp");
/* uprobe benchmarks */
#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \
const struct bench bench_trig_##KIND = { \
.name = "trig-" NAME, \
.validate = trigger_validate, \
.setup = KIND##_setup, \
.producer_thread = uprobe_producer_##PRODUCER, \
.measure = trigger_measure, \
.report_progress = hits_drops_report_progress, \
.report_final = hits_drops_report_final, \
}
BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");
BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");
BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");