linux/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */

#define _GNU_SOURCE
#include <linux/compiler.h>
#include <linux/ring_buffer.h>
#include <linux/build_bug.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/sysinfo.h>
#include <test_progs.h>
#include <uapi/linux/bpf.h>
#include <unistd.h>

#include "user_ringbuf_fail.skel.h"
#include "user_ringbuf_success.skel.h"

#include "../progs/test_user_ringbuf.h"

static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ;
static const long c_ringbuf_size = 1 << 12; /* 1 small page */
static const long c_max_entries = c_ringbuf_size / c_sample_size;

static void drain_current_samples(void)
{
	syscall(__NR_getpgid);
}

static int write_samples(struct user_ring_buffer *ringbuf, uint32_t num_samples)
{
	int i, err = 0;

	/* Write some number of samples to the ring buffer. */
	for (i = 0; i < num_samples; i++) {
		struct sample *entry;
		int read;

		entry = user_ring_buffer__reserve(ringbuf, sizeof(*entry));
		if (!entry) {
			err = -errno;
			goto done;
		}

		entry->pid = getpid();
		entry->seq = i;
		entry->value = i * i;

		read = snprintf(entry->comm, sizeof(entry->comm), "%u", i);
		if (read <= 0) {
			/* Assert on the error path to avoid spamming logs with
			 * mostly success messages.
			 */
			ASSERT_GT(read, 0, "snprintf_comm");
			err = read;
			user_ring_buffer__discard(ringbuf, entry);
			goto done;
		}

		user_ring_buffer__submit(ringbuf, entry);
	}

done:
	drain_current_samples();

	return err;
}

static struct user_ringbuf_success *open_load_ringbuf_skel(void)
{
	struct user_ringbuf_success *skel;
	int err;

	skel = user_ringbuf_success__open();
	if (!ASSERT_OK_PTR(skel, "skel_open"))
		return NULL;

	err = bpf_map__set_max_entries(skel->maps.user_ringbuf, c_ringbuf_size);
	if (!ASSERT_OK(err, "set_max_entries"))
		goto cleanup;

	err = bpf_map__set_max_entries(skel->maps.kernel_ringbuf, c_ringbuf_size);
	if (!ASSERT_OK(err, "set_max_entries"))
		goto cleanup;

	err = user_ringbuf_success__load(skel);
	if (!ASSERT_OK(err, "skel_load"))
		goto cleanup;

	return skel;

cleanup:
	user_ringbuf_success__destroy(skel);
	return NULL;
}

static void test_user_ringbuf_mappings(void)
{
	int err, rb_fd;
	int page_size = getpagesize();
	void *mmap_ptr;
	struct user_ringbuf_success *skel;

	skel = open_load_ringbuf_skel();
	if (!skel)
		return;

	rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
	/* cons_pos can be mapped R/O, can't add +X with mprotect. */
	mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
	ASSERT_OK_PTR(mmap_ptr, "ro_cons_pos");
	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_cons_pos_protect");
	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
	ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "wr_prod_pos");
	err = -errno;
	ASSERT_ERR(err, "wr_prod_pos_err");
	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro_cons");

	/* prod_pos can be mapped RW, can't add +X with mprotect. */
	mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
			rb_fd, page_size);
	ASSERT_OK_PTR(mmap_ptr, "rw_prod_pos");
	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_prod_pos_protect");
	err = -errno;
	ASSERT_ERR(err, "wr_prod_pos_err");
	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_prod");

	/* data pages can be mapped RW, can't add +X with mprotect. */
	mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd,
			2 * page_size);
	ASSERT_OK_PTR(mmap_ptr, "rw_data");
	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_data_protect");
	err = -errno;
	ASSERT_ERR(err, "exec_data_err");
	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_data");

	user_ringbuf_success__destroy(skel);
}

static int load_skel_create_ringbufs(struct user_ringbuf_success **skel_out,
				     struct ring_buffer **kern_ringbuf_out,
				     ring_buffer_sample_fn callback,
				     struct user_ring_buffer **user_ringbuf_out)
{
	struct user_ringbuf_success *skel;
	struct ring_buffer *kern_ringbuf = NULL;
	struct user_ring_buffer *user_ringbuf = NULL;
	int err = -ENOMEM, rb_fd;

	skel = open_load_ringbuf_skel();
	if (!skel)
		return err;

	/* only trigger BPF program for current process */
	skel->bss->pid = getpid();

	if (kern_ringbuf_out) {
		rb_fd = bpf_map__fd(skel->maps.kernel_ringbuf);
		kern_ringbuf = ring_buffer__new(rb_fd, callback, skel, NULL);
		if (!ASSERT_OK_PTR(kern_ringbuf, "kern_ringbuf_create"))
			goto cleanup;

		*kern_ringbuf_out = kern_ringbuf;
	}

	if (user_ringbuf_out) {
		rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
		user_ringbuf = user_ring_buffer__new(rb_fd, NULL);
		if (!ASSERT_OK_PTR(user_ringbuf, "user_ringbuf_create"))
			goto cleanup;

		*user_ringbuf_out = user_ringbuf;
		ASSERT_EQ(skel->bss->read, 0, "no_reads_after_load");
	}

	err = user_ringbuf_success__attach(skel);
	if (!ASSERT_OK(err, "skel_attach"))
		goto cleanup;

	*skel_out = skel;
	return 0;

cleanup:
	if (kern_ringbuf_out)
		*kern_ringbuf_out = NULL;
	if (user_ringbuf_out)
		*user_ringbuf_out = NULL;
	ring_buffer__free(kern_ringbuf);
	user_ring_buffer__free(user_ringbuf);
	user_ringbuf_success__destroy(skel);
	return err;
}

static int load_skel_create_user_ringbuf(struct user_ringbuf_success **skel_out,
					 struct user_ring_buffer **ringbuf_out)
{
	return load_skel_create_ringbufs(skel_out, NULL, NULL, ringbuf_out);
}

static void manually_write_test_invalid_sample(struct user_ringbuf_success *skel,
					       __u32 size, __u64 producer_pos, int err)
{
	void *data_ptr;
	__u64 *producer_pos_ptr;
	int rb_fd, page_size = getpagesize();

	rb_fd = bpf_map__fd(skel->maps.user_ringbuf);

	ASSERT_EQ(skel->bss->read, 0, "num_samples_before_bad_sample");

	/* Map the producer_pos as RW. */
	producer_pos_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
				MAP_SHARED, rb_fd, page_size);
	ASSERT_OK_PTR(producer_pos_ptr, "producer_pos_ptr");

	/* Map the data pages as RW. */
	data_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
	ASSERT_OK_PTR(data_ptr, "rw_data");

	memset(data_ptr, 0, BPF_RINGBUF_HDR_SZ);
	*(__u32 *)data_ptr = size;

	/* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in the kernel. */
	smp_store_release(producer_pos_ptr, producer_pos + BPF_RINGBUF_HDR_SZ);

	drain_current_samples();
	ASSERT_EQ(skel->bss->read, 0, "num_samples_after_bad_sample");
	ASSERT_EQ(skel->bss->err, err, "err_after_bad_sample");

	ASSERT_OK(munmap(producer_pos_ptr, page_size), "unmap_producer_pos");
	ASSERT_OK(munmap(data_ptr, page_size), "unmap_data_ptr");
}

static void test_user_ringbuf_post_misaligned(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *ringbuf;
	int err;
	__u32 size = (1 << 5) + 7;

	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
	if (!ASSERT_OK(err, "misaligned_skel"))
		return;

	manually_write_test_invalid_sample(skel, size, size, -EINVAL);
	user_ring_buffer__free(ringbuf);
	user_ringbuf_success__destroy(skel);
}

static void test_user_ringbuf_post_producer_wrong_offset(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *ringbuf;
	int err;
	__u32 size = (1 << 5);

	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
	if (!ASSERT_OK(err, "wrong_offset_skel"))
		return;

	manually_write_test_invalid_sample(skel, size, size - 8, -EINVAL);
	user_ring_buffer__free(ringbuf);
	user_ringbuf_success__destroy(skel);
}

static void test_user_ringbuf_post_larger_than_ringbuf_sz(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *ringbuf;
	int err;
	__u32 size = c_ringbuf_size;

	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
	if (!ASSERT_OK(err, "huge_sample_skel"))
		return;

	manually_write_test_invalid_sample(skel, size, size, -E2BIG);
	user_ring_buffer__free(ringbuf);
	user_ringbuf_success__destroy(skel);
}

static void test_user_ringbuf_basic(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *ringbuf;
	int err;

	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
	if (!ASSERT_OK(err, "ringbuf_basic_skel"))
		return;

	ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");

	err = write_samples(ringbuf, 2);
	if (!ASSERT_OK(err, "write_samples"))
		goto cleanup;

	ASSERT_EQ(skel->bss->read, 2, "num_samples_read_after");

cleanup:
	user_ring_buffer__free(ringbuf);
	user_ringbuf_success__destroy(skel);
}

static void test_user_ringbuf_sample_full_ring_buffer(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *ringbuf;
	int err;
	void *sample;

	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
	if (!ASSERT_OK(err, "ringbuf_full_sample_skel"))
		return;

	sample = user_ring_buffer__reserve(ringbuf, c_ringbuf_size - BPF_RINGBUF_HDR_SZ);
	if (!ASSERT_OK_PTR(sample, "full_sample"))
		goto cleanup;

	user_ring_buffer__submit(ringbuf, sample);
	ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
	drain_current_samples();
	ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after");

cleanup:
	user_ring_buffer__free(ringbuf);
	user_ringbuf_success__destroy(skel);
}

static void test_user_ringbuf_post_alignment_autoadjust(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *ringbuf;
	struct sample *sample;
	int err;

	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
	if (!ASSERT_OK(err, "ringbuf_align_autoadjust_skel"))
		return;

	/* libbpf should automatically round any sample up to an 8-byte alignment. */
	sample = user_ring_buffer__reserve(ringbuf, sizeof(*sample) + 1);
	ASSERT_OK_PTR(sample, "reserve_autoaligned");
	user_ring_buffer__submit(ringbuf, sample);

	ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
	drain_current_samples();
	ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after");

	user_ring_buffer__free(ringbuf);
	user_ringbuf_success__destroy(skel);
}

static void test_user_ringbuf_overfill(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *ringbuf;
	int err;

	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
	if (err)
		return;

	err = write_samples(ringbuf, c_max_entries * 5);
	ASSERT_ERR(err, "write_samples");
	ASSERT_EQ(skel->bss->read, c_max_entries, "max_entries");

	user_ring_buffer__free(ringbuf);
	user_ringbuf_success__destroy(skel);
}

static void test_user_ringbuf_discards_properly_ignored(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *ringbuf;
	int err, num_discarded = 0;
	__u64 *token;

	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
	if (err)
		return;

	ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");

	while (1) {
		/* Write samples until the buffer is full. */
		token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
		if (!token)
			break;

		user_ring_buffer__discard(ringbuf, token);
		num_discarded++;
	}

	if (!ASSERT_GE(num_discarded, 0, "num_discarded"))
		goto cleanup;

	/* Should not read any samples, as they are all discarded. */
	ASSERT_EQ(skel->bss->read, 0, "num_pre_kick");
	drain_current_samples();
	ASSERT_EQ(skel->bss->read, 0, "num_post_kick");

	/* Now that the ring buffer has been drained, we should be able to
	 * reserve another token.
	 */
	token = user_ring_buffer__reserve(ringbuf, sizeof(*token));

	if (!ASSERT_OK_PTR(token, "new_token"))
		goto cleanup;

	user_ring_buffer__discard(ringbuf, token);
cleanup:
	user_ring_buffer__free(ringbuf);
	user_ringbuf_success__destroy(skel);
}

static void test_user_ringbuf_loop(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *ringbuf;
	uint32_t total_samples = 8192;
	uint32_t remaining_samples = total_samples;
	int err;

	BUILD_BUG_ON(total_samples <= c_max_entries);
	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
	if (err)
		return;

	do  {
		uint32_t curr_samples;

		curr_samples = remaining_samples > c_max_entries
			? c_max_entries : remaining_samples;
		err = write_samples(ringbuf, curr_samples);
		if (err != 0) {
			/* Assert inside of if statement to avoid flooding logs
			 * on the success path.
			 */
			ASSERT_OK(err, "write_samples");
			goto cleanup;
		}

		remaining_samples -= curr_samples;
		ASSERT_EQ(skel->bss->read, total_samples - remaining_samples,
			  "current_batched_entries");
	} while (remaining_samples > 0);
	ASSERT_EQ(skel->bss->read, total_samples, "total_batched_entries");

cleanup:
	user_ring_buffer__free(ringbuf);
	user_ringbuf_success__destroy(skel);
}

static int send_test_message(struct user_ring_buffer *ringbuf,
			     enum test_msg_op op, s64 operand_64,
			     s32 operand_32)
{
	struct test_msg *msg;

	msg = user_ring_buffer__reserve(ringbuf, sizeof(*msg));
	if (!msg) {
		/* Assert on the error path to avoid spamming logs with mostly
		 * success messages.
		 */
		ASSERT_OK_PTR(msg, "reserve_msg");
		return -ENOMEM;
	}

	msg->msg_op = op;

	switch (op) {
	case TEST_MSG_OP_INC64:
	case TEST_MSG_OP_MUL64:
		msg->operand_64 = operand_64;
		break;
	case TEST_MSG_OP_INC32:
	case TEST_MSG_OP_MUL32:
		msg->operand_32 = operand_32;
		break;
	default:
		PRINT_FAIL("Invalid operand %d\n", op);
		user_ring_buffer__discard(ringbuf, msg);
		return -EINVAL;
	}

	user_ring_buffer__submit(ringbuf, msg);

	return 0;
}

static void kick_kernel_read_messages(void)
{
	syscall(__NR_prctl);
}

static int handle_kernel_msg(void *ctx, void *data, size_t len)
{
	struct user_ringbuf_success *skel = ctx;
	struct test_msg *msg = data;

	switch (msg->msg_op) {
	case TEST_MSG_OP_INC64:
		skel->bss->user_mutated += msg->operand_64;
		return 0;
	case TEST_MSG_OP_INC32:
		skel->bss->user_mutated += msg->operand_32;
		return 0;
	case TEST_MSG_OP_MUL64:
		skel->bss->user_mutated *= msg->operand_64;
		return 0;
	case TEST_MSG_OP_MUL32:
		skel->bss->user_mutated *= msg->operand_32;
		return 0;
	default:
		fprintf(stderr, "Invalid operand %d\n", msg->msg_op);
		return -EINVAL;
	}
}

static void drain_kernel_messages_buffer(struct ring_buffer *kern_ringbuf,
					 struct user_ringbuf_success *skel)
{
	int cnt;

	cnt = ring_buffer__consume(kern_ringbuf);
	ASSERT_EQ(cnt, 8, "consume_kern_ringbuf");
	ASSERT_OK(skel->bss->err, "consume_kern_ringbuf_err");
}

static void test_user_ringbuf_msg_protocol(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *user_ringbuf;
	struct ring_buffer *kern_ringbuf;
	int err, i;
	__u64 expected_kern = 0;

	err = load_skel_create_ringbufs(&skel, &kern_ringbuf, handle_kernel_msg, &user_ringbuf);
	if (!ASSERT_OK(err, "create_ringbufs"))
		return;

	for (i = 0; i < 64; i++) {
		enum test_msg_op op = i % TEST_MSG_OP_NUM_OPS;
		__u64 operand_64 = TEST_OP_64;
		__u32 operand_32 = TEST_OP_32;

		err = send_test_message(user_ringbuf, op, operand_64, operand_32);
		if (err) {
			/* Only assert on a failure to avoid spamming success logs. */
			ASSERT_OK(err, "send_test_message");
			goto cleanup;
		}

		switch (op) {
		case TEST_MSG_OP_INC64:
			expected_kern += operand_64;
			break;
		case TEST_MSG_OP_INC32:
			expected_kern += operand_32;
			break;
		case TEST_MSG_OP_MUL64:
			expected_kern *= operand_64;
			break;
		case TEST_MSG_OP_MUL32:
			expected_kern *= operand_32;
			break;
		default:
			PRINT_FAIL("Unexpected op %d\n", op);
			goto cleanup;
		}

		if (i % 8 == 0) {
			kick_kernel_read_messages();
			ASSERT_EQ(skel->bss->kern_mutated, expected_kern, "expected_kern");
			ASSERT_EQ(skel->bss->err, 0, "bpf_prog_err");
			drain_kernel_messages_buffer(kern_ringbuf, skel);
		}
	}

cleanup:
	ring_buffer__free(kern_ringbuf);
	user_ring_buffer__free(user_ringbuf);
	user_ringbuf_success__destroy(skel);
}

static void *kick_kernel_cb(void *arg)
{
	/* Kick the kernel, causing it to drain the ring buffer and then wake
	 * up the test thread waiting on epoll.
	 */
	syscall(__NR_prlimit64);

	return NULL;
}

static int spawn_kick_thread_for_poll(void)
{
	pthread_t thread;

	return pthread_create(&thread, NULL, kick_kernel_cb, NULL);
}

static void test_user_ringbuf_blocking_reserve(void)
{
	struct user_ringbuf_success *skel;
	struct user_ring_buffer *ringbuf;
	int err, num_written = 0;
	__u64 *token;

	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
	if (err)
		return;

	ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");

	while (1) {
		/* Write samples until the buffer is full. */
		token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
		if (!token)
			break;

		*token = 0xdeadbeef;

		user_ring_buffer__submit(ringbuf, token);
		num_written++;
	}

	if (!ASSERT_GE(num_written, 0, "num_written"))
		goto cleanup;

	/* Should not have read any samples until the kernel is kicked. */
	ASSERT_EQ(skel->bss->read, 0, "num_pre_kick");

	/* We correctly time out after 1 second, without a sample. */
	token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 1000);
	if (!ASSERT_EQ(token, NULL, "pre_kick_timeout_token"))
		goto cleanup;

	err = spawn_kick_thread_for_poll();
	if (!ASSERT_EQ(err, 0, "deferred_kick_thread\n"))
		goto cleanup;

	/* After spawning another thread that asynchronously kicks the kernel to
	 * drain the messages, we're able to block and successfully get a
	 * sample once we receive an event notification.
	 */
	token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 10000);

	if (!ASSERT_OK_PTR(token, "block_token"))
		goto cleanup;

	ASSERT_GT(skel->bss->read, 0, "num_post_kill");
	ASSERT_LE(skel->bss->read, num_written, "num_post_kill");
	ASSERT_EQ(skel->bss->err, 0, "err_post_poll");
	user_ring_buffer__discard(ringbuf, token);

cleanup:
	user_ring_buffer__free(ringbuf);
	user_ringbuf_success__destroy(skel);
}

#define SUCCESS_TEST(_func) { _func, #_func }

static struct {
	void (*test_callback)(void);
	const char *test_name;
} success_tests[] = {
	SUCCESS_TEST(test_user_ringbuf_mappings),
	SUCCESS_TEST(test_user_ringbuf_post_misaligned),
	SUCCESS_TEST(test_user_ringbuf_post_producer_wrong_offset),
	SUCCESS_TEST(test_user_ringbuf_post_larger_than_ringbuf_sz),
	SUCCESS_TEST(test_user_ringbuf_basic),
	SUCCESS_TEST(test_user_ringbuf_sample_full_ring_buffer),
	SUCCESS_TEST(test_user_ringbuf_post_alignment_autoadjust),
	SUCCESS_TEST(test_user_ringbuf_overfill),
	SUCCESS_TEST(test_user_ringbuf_discards_properly_ignored),
	SUCCESS_TEST(test_user_ringbuf_loop),
	SUCCESS_TEST(test_user_ringbuf_msg_protocol),
	SUCCESS_TEST(test_user_ringbuf_blocking_reserve),
};

void test_user_ringbuf(void)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
		if (!test__start_subtest(success_tests[i].test_name))
			continue;

		success_tests[i].test_callback();
	}

	RUN_TESTS(user_ringbuf_fail);
}