// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
#include <vmlinux.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include "bpf_misc.h"
#include "bpf_experimental.h"
extern void bpf_rcu_read_lock(void) __ksym;
extern void bpf_rcu_read_unlock(void) __ksym;
struct node_data {
long key;
long list_data;
struct bpf_rb_node r;
struct bpf_list_node l;
struct bpf_refcount ref;
};
struct map_value {
struct node_data __kptr *node;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, 2);
} stashed_nodes SEC(".maps");
struct node_acquire {
long key;
long data;
struct bpf_rb_node node;
struct bpf_refcount refcount;
};
#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
private(A) struct bpf_spin_lock lock;
private(A) struct bpf_rb_root root __contains(node_data, r);
private(A) struct bpf_list_head head __contains(node_data, l);
private(B) struct bpf_spin_lock alock;
private(B) struct bpf_rb_root aroot __contains(node_acquire, node);
private(C) struct bpf_spin_lock block;
private(C) struct bpf_rb_root broot __contains(node_data, r);
static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b)
{
struct node_data *a;
struct node_data *b;
a = container_of(node_a, struct node_data, r);
b = container_of(node_b, struct node_data, r);
return a->key < b->key;
}
static bool less_a(struct bpf_rb_node *a, const struct bpf_rb_node *b)
{
struct node_acquire *node_a;
struct node_acquire *node_b;
node_a = container_of(a, struct node_acquire, node);
node_b = container_of(b, struct node_acquire, node);
return node_a->key < node_b->key;
}
static long __insert_in_tree_and_list(struct bpf_list_head *head,
struct bpf_rb_root *root,
struct bpf_spin_lock *lock)
{
struct node_data *n, *m;
n = bpf_obj_new(typeof(*n));
if (!n)
return -1;
m = bpf_refcount_acquire(n);
m->key = 123;
m->list_data = 456;
bpf_spin_lock(lock);
if (bpf_rbtree_add(root, &n->r, less)) {
/* Failure to insert - unexpected */
bpf_spin_unlock(lock);
bpf_obj_drop(m);
return -2;
}
bpf_spin_unlock(lock);
bpf_spin_lock(lock);
if (bpf_list_push_front(head, &m->l)) {
/* Failure to insert - unexpected */
bpf_spin_unlock(lock);
return -3;
}
bpf_spin_unlock(lock);
return 0;
}
static long __stash_map_insert_tree(int idx, int val, struct bpf_rb_root *root,
struct bpf_spin_lock *lock)
{
struct map_value *mapval;
struct node_data *n, *m;
mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval)
return -1;
n = bpf_obj_new(typeof(*n));
if (!n)
return -2;
n->key = val;
m = bpf_refcount_acquire(n);
n = bpf_kptr_xchg(&mapval->node, n);
if (n) {
bpf_obj_drop(n);
bpf_obj_drop(m);
return -3;
}
bpf_spin_lock(lock);
if (bpf_rbtree_add(root, &m->r, less)) {
/* Failure to insert - unexpected */
bpf_spin_unlock(lock);
return -4;
}
bpf_spin_unlock(lock);
return 0;
}
static long __read_from_tree(struct bpf_rb_root *root,
struct bpf_spin_lock *lock,
bool remove_from_tree)
{
struct bpf_rb_node *rb;
struct node_data *n;
long res = -99;
bpf_spin_lock(lock);
rb = bpf_rbtree_first(root);
if (!rb) {
bpf_spin_unlock(lock);
return -1;
}
n = container_of(rb, struct node_data, r);
res = n->key;
if (!remove_from_tree) {
bpf_spin_unlock(lock);
return res;
}
rb = bpf_rbtree_remove(root, rb);
bpf_spin_unlock(lock);
if (!rb)
return -2;
n = container_of(rb, struct node_data, r);
bpf_obj_drop(n);
return res;
}
static long __read_from_list(struct bpf_list_head *head,
struct bpf_spin_lock *lock,
bool remove_from_list)
{
struct bpf_list_node *l;
struct node_data *n;
long res = -99;
bpf_spin_lock(lock);
l = bpf_list_pop_front(head);
if (!l) {
bpf_spin_unlock(lock);
return -1;
}
n = container_of(l, struct node_data, l);
res = n->list_data;
if (!remove_from_list) {
if (bpf_list_push_back(head, &n->l)) {
bpf_spin_unlock(lock);
return -2;
}
}
bpf_spin_unlock(lock);
if (remove_from_list)
bpf_obj_drop(n);
return res;
}
static long __read_from_unstash(int idx)
{
struct node_data *n = NULL;
struct map_value *mapval;
long val = -99;
mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval)
return -1;
n = bpf_kptr_xchg(&mapval->node, n);
if (!n)
return -2;
val = n->key;
bpf_obj_drop(n);
return val;
}
#define INSERT_READ_BOTH(rem_tree, rem_list, desc) \
SEC("tc") \
__description(desc) \
__success __retval(579) \
long insert_and_remove_tree_##rem_tree##_list_##rem_list(void *ctx) \
{ \
long err, tree_data, list_data; \
\
err = __insert_in_tree_and_list(&head, &root, &lock); \
if (err) \
return err; \
\
err = __read_from_tree(&root, &lock, rem_tree); \
if (err < 0) \
return err; \
else \
tree_data = err; \
\
err = __read_from_list(&head, &lock, rem_list); \
if (err < 0) \
return err; \
else \
list_data = err; \
\
return tree_data + list_data; \
}
/* After successful insert of struct node_data into both collections:
* - it should have refcount = 2
* - removing / not removing the node_data from a collection after
* reading should have no effect on ability to read / remove from
* the other collection
*/
INSERT_READ_BOTH(true, true, "insert_read_both: remove from tree + list");
INSERT_READ_BOTH(false, false, "insert_read_both: remove from neither");
INSERT_READ_BOTH(true, false, "insert_read_both: remove from tree");
INSERT_READ_BOTH(false, true, "insert_read_both: remove from list");
#undef INSERT_READ_BOTH
#define INSERT_READ_BOTH(rem_tree, rem_list, desc) \
SEC("tc") \
__description(desc) \
__success __retval(579) \
long insert_and_remove_lf_tree_##rem_tree##_list_##rem_list(void *ctx) \
{ \
long err, tree_data, list_data; \
\
err = __insert_in_tree_and_list(&head, &root, &lock); \
if (err) \
return err; \
\
err = __read_from_list(&head, &lock, rem_list); \
if (err < 0) \
return err; \
else \
list_data = err; \
\
err = __read_from_tree(&root, &lock, rem_tree); \
if (err < 0) \
return err; \
else \
tree_data = err; \
\
return tree_data + list_data; \
}
/* Similar to insert_read_both, but list data is read and possibly removed
* first
*
* Results should be no different than reading and possibly removing rbtree
* node first
*/
INSERT_READ_BOTH(true, true, "insert_read_both_list_first: remove from tree + list");
INSERT_READ_BOTH(false, false, "insert_read_both_list_first: remove from neither");
INSERT_READ_BOTH(true, false, "insert_read_both_list_first: remove from tree");
INSERT_READ_BOTH(false, true, "insert_read_both_list_first: remove from list");
#define INSERT_DOUBLE_READ_AND_DEL(read_fn, read_root, desc) \
SEC("tc") \
__description(desc) \
__success __retval(-1) \
long insert_double_##read_fn##_and_del_##read_root(void *ctx) \
{ \
long err, list_data; \
\
err = __insert_in_tree_and_list(&head, &root, &lock); \
if (err) \
return err; \
\
err = read_fn(&read_root, &lock, true); \
if (err < 0) \
return err; \
else \
list_data = err; \
\
err = read_fn(&read_root, &lock, true); \
if (err < 0) \
return err; \
\
return err + list_data; \
}
/* Insert into both tree and list, then try reading-and-removing from either twice
*
* The second read-and-remove should fail on read step since the node has
* already been removed
*/
INSERT_DOUBLE_READ_AND_DEL(__read_from_tree, root, "insert_double_del: 2x read-and-del from tree");
INSERT_DOUBLE_READ_AND_DEL(__read_from_list, head, "insert_double_del: 2x read-and-del from list");
#define INSERT_STASH_READ(rem_tree, desc) \
SEC("tc") \
__description(desc) \
__success __retval(84) \
long insert_rbtree_and_stash__del_tree_##rem_tree(void *ctx) \
{ \
long err, tree_data, map_data; \
\
err = __stash_map_insert_tree(0, 42, &root, &lock); \
if (err) \
return err; \
\
err = __read_from_tree(&root, &lock, rem_tree); \
if (err < 0) \
return err; \
else \
tree_data = err; \
\
err = __read_from_unstash(0); \
if (err < 0) \
return err; \
else \
map_data = err; \
\
return tree_data + map_data; \
}
/* Stash a refcounted node in map_val, insert same node into tree, then try
* reading data from tree then unstashed map_val, possibly removing from tree
*
* Removing from tree should have no effect on map_val kptr validity
*/
INSERT_STASH_READ(true, "insert_stash_read: remove from tree");
INSERT_STASH_READ(false, "insert_stash_read: don't remove from tree");
SEC("tc")
__success
long rbtree_refcounted_node_ref_escapes(void *ctx)
{
struct node_acquire *n, *m;
n = bpf_obj_new(typeof(*n));
if (!n)
return 1;
bpf_spin_lock(&alock);
bpf_rbtree_add(&aroot, &n->node, less_a);
m = bpf_refcount_acquire(n);
bpf_spin_unlock(&alock);
if (!m)
return 2;
m->key = 2;
bpf_obj_drop(m);
return 0;
}
SEC("tc")
__success
long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
{
struct node_acquire *n, *m;
n = bpf_obj_new(typeof(*n));
if (!n)
return 1;
m = bpf_refcount_acquire(n);
m->key = 2;
bpf_spin_lock(&alock);
bpf_rbtree_add(&aroot, &n->node, less_a);
bpf_spin_unlock(&alock);
bpf_obj_drop(m);
return 0;
}
static long __stash_map_empty_xchg(struct node_data *n, int idx)
{
struct map_value *mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval) {
bpf_obj_drop(n);
return 1;
}
n = bpf_kptr_xchg(&mapval->node, n);
if (n) {
bpf_obj_drop(n);
return 2;
}
return 0;
}
SEC("tc")
long rbtree_wrong_owner_remove_fail_a1(void *ctx)
{
struct node_data *n, *m;
n = bpf_obj_new(typeof(*n));
if (!n)
return 1;
m = bpf_refcount_acquire(n);
if (__stash_map_empty_xchg(n, 0)) {
bpf_obj_drop(m);
return 2;
}
if (__stash_map_empty_xchg(m, 1))
return 3;
return 0;
}
SEC("tc")
long rbtree_wrong_owner_remove_fail_b(void *ctx)
{
struct map_value *mapval;
struct node_data *n;
int idx = 0;
mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval)
return 1;
n = bpf_kptr_xchg(&mapval->node, NULL);
if (!n)
return 2;
bpf_spin_lock(&block);
bpf_rbtree_add(&broot, &n->r, less);
bpf_spin_unlock(&block);
return 0;
}
SEC("tc")
long rbtree_wrong_owner_remove_fail_a2(void *ctx)
{
struct map_value *mapval;
struct bpf_rb_node *res;
struct node_data *m;
int idx = 1;
mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
if (!mapval)
return 1;
m = bpf_kptr_xchg(&mapval->node, NULL);
if (!m)
return 2;
bpf_spin_lock(&lock);
/* make m non-owning ref */
bpf_list_push_back(&head, &m->l);
res = bpf_rbtree_remove(&root, &m->r);
bpf_spin_unlock(&lock);
if (res) {
bpf_obj_drop(container_of(res, struct node_data, r));
return 3;
}
return 0;
}
SEC("?fentry.s/bpf_testmod_test_read")
__success
int BPF_PROG(rbtree_sleepable_rcu,
struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
{
struct bpf_rb_node *rb;
struct node_data *n, *m = NULL;
n = bpf_obj_new(typeof(*n));
if (!n)
return 0;
bpf_rcu_read_lock();
bpf_spin_lock(&lock);
bpf_rbtree_add(&root, &n->r, less);
rb = bpf_rbtree_first(&root);
if (!rb)
goto err_out;
rb = bpf_rbtree_remove(&root, rb);
if (!rb)
goto err_out;
m = container_of(rb, struct node_data, r);
err_out:
bpf_spin_unlock(&lock);
bpf_rcu_read_unlock();
if (m)
bpf_obj_drop(m);
return 0;
}
SEC("?fentry.s/bpf_testmod_test_read")
__success
int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock,
struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
{
struct bpf_rb_node *rb;
struct node_data *n, *m = NULL;
n = bpf_obj_new(typeof(*n));
if (!n)
return 0;
/* No explicit bpf_rcu_read_lock */
bpf_spin_lock(&lock);
bpf_rbtree_add(&root, &n->r, less);
rb = bpf_rbtree_first(&root);
if (!rb)
goto err_out;
rb = bpf_rbtree_remove(&root, rb);
if (!rb)
goto err_out;
m = container_of(rb, struct node_data, r);
err_out:
bpf_spin_unlock(&lock);
/* No explicit bpf_rcu_read_unlock */
if (m)
bpf_obj_drop(m);
return 0;
}
char _license[] SEC("license") = "GPL";