linux/tools/lib/bpf/btf_relocate.c

// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2024, Oracle and/or its affiliates. */

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif

#ifdef __KERNEL__
#include <linux/bpf.h>
#include <linux/bsearch.h>
#include <linux/btf.h>
#include <linux/sort.h>
#include <linux/string.h>
#include <linux/bpf_verifier.h>

#define btf_type_by_id				(struct btf_type *)btf_type_by_id
#define btf__type_cnt				btf_nr_types
#define btf__base_btf				btf_base_btf
#define btf__name_by_offset			btf_name_by_offset
#define btf__str_by_offset			btf_str_by_offset
#define btf_kflag				btf_type_kflag

#define calloc(nmemb, sz)			kvcalloc(nmemb, sz, GFP_KERNEL | __GFP_NOWARN)
#define free(ptr)				kvfree(ptr)
#define qsort(base, num, sz, cmp)		sort(base, num, sz, cmp, NULL)

#else

#include "btf.h"
#include "bpf.h"
#include "libbpf.h"
#include "libbpf_internal.h"

#endif /* __KERNEL__ */

struct btf;

struct btf_relocate {
	struct btf *btf;
	const struct btf *base_btf;
	const struct btf *dist_base_btf;
	unsigned int nr_base_types;
	unsigned int nr_split_types;
	unsigned int nr_dist_base_types;
	int dist_str_len;
	int base_str_len;
	__u32 *id_map;
	__u32 *str_map;
};

/* Set temporarily in relocation id_map if distilled base struct/union is
 * embedded in a split BTF struct/union; in such a case, size information must
 * match between distilled base BTF and base BTF representation of type.
 */
#define BTF_IS_EMBEDDED ((__u32)-1)

/* <name, size, id> triple used in sorting/searching distilled base BTF. */
struct btf_name_info {
	const char *name;
	/* set when search requires a size match */
	bool needs_size: 1;
	unsigned int size: 31;
	__u32 id;
};

static int btf_relocate_rewrite_type_id(struct btf_relocate *r, __u32 i)
{
	struct btf_type *t = btf_type_by_id(r->btf, i);
	struct btf_field_iter it;
	__u32 *id;
	int err;

	err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
	if (err)
		return err;

	while ((id = btf_field_iter_next(&it)))
		*id = r->id_map[*id];
	return 0;
}

/* Simple string comparison used for sorting within BTF, since all distilled
 * types are named.  If strings match, and size is non-zero for both elements
 * fall back to using size for ordering.
 */
static int cmp_btf_name_size(const void *n1, const void *n2)
{
	const struct btf_name_info *ni1 = n1;
	const struct btf_name_info *ni2 = n2;
	int name_diff = strcmp(ni1->name, ni2->name);

	if (!name_diff && ni1->needs_size && ni2->needs_size)
		return ni2->size - ni1->size;
	return name_diff;
}

/* Binary search with a small twist; find leftmost element that matches
 * so that we can then iterate through all exact matches.  So for example
 * searching { "a", "bb", "bb", "c" }  we would always match on the
 * leftmost "bb".
 */
static struct btf_name_info *search_btf_name_size(struct btf_name_info *key,
						  struct btf_name_info *vals,
						  int nelems)
{
	struct btf_name_info *ret = NULL;
	int high = nelems - 1;
	int low = 0;

	while (low <= high) {
		int mid = (low + high)/2;
		struct btf_name_info *val = &vals[mid];
		int diff = cmp_btf_name_size(key, val);

		if (diff == 0)
			ret = val;
		/* even if found, keep searching for leftmost match */
		if (diff <= 0)
			high = mid - 1;
		else
			low = mid + 1;
	}
	return ret;
}

/* If a member of a split BTF struct/union refers to a base BTF
 * struct/union, mark that struct/union id temporarily in the id_map
 * with BTF_IS_EMBEDDED.  Members can be const/restrict/volatile/typedef
 * reference types, but if a pointer is encountered, the type is no longer
 * considered embedded.
 */
static int btf_mark_embedded_composite_type_ids(struct btf_relocate *r, __u32 i)
{
	struct btf_type *t = btf_type_by_id(r->btf, i);
	struct btf_field_iter it;
	__u32 *id;
	int err;

	if (!btf_is_composite(t))
		return 0;

	err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
	if (err)
		return err;

	while ((id = btf_field_iter_next(&it))) {
		__u32 next_id = *id;

		while (next_id) {
			t = btf_type_by_id(r->btf, next_id);
			switch (btf_kind(t)) {
			case BTF_KIND_CONST:
			case BTF_KIND_RESTRICT:
			case BTF_KIND_VOLATILE:
			case BTF_KIND_TYPEDEF:
			case BTF_KIND_TYPE_TAG:
				next_id = t->type;
				break;
			case BTF_KIND_ARRAY: {
				struct btf_array *a = btf_array(t);

				next_id = a->type;
				break;
			}
			case BTF_KIND_STRUCT:
			case BTF_KIND_UNION:
				if (next_id < r->nr_dist_base_types)
					r->id_map[next_id] = BTF_IS_EMBEDDED;
				next_id = 0;
				break;
			default:
				next_id = 0;
				break;
			}
		}
	}

	return 0;
}

/* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate
 * through base BTF looking up distilled type (using binary search) equivalents.
 */
static int btf_relocate_map_distilled_base(struct btf_relocate *r)
{
	struct btf_name_info *info, *info_end;
	struct btf_type *base_t, *dist_t;
	__u8 *base_name_cnt = NULL;
	int err = 0;
	__u32 id;

	/* generate a sort index array of name/type ids sorted by name for
	 * distilled base BTF to speed name-based lookups.
	 */
	info = calloc(r->nr_dist_base_types, sizeof(*info));
	if (!info) {
		err = -ENOMEM;
		goto done;
	}
	info_end = info + r->nr_dist_base_types;
	for (id = 0; id < r->nr_dist_base_types; id++) {
		dist_t = btf_type_by_id(r->dist_base_btf, id);
		info[id].name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
		info[id].id = id;
		info[id].size = dist_t->size;
		info[id].needs_size = true;
	}
	qsort(info, r->nr_dist_base_types, sizeof(*info), cmp_btf_name_size);

	/* Mark distilled base struct/union members of split BTF structs/unions
	 * in id_map with BTF_IS_EMBEDDED; this signals that these types
	 * need to match both name and size, otherwise embedding the base
	 * struct/union in the split type is invalid.
	 */
	for (id = r->nr_dist_base_types; id < r->nr_split_types; id++) {
		err = btf_mark_embedded_composite_type_ids(r, id);
		if (err)
			goto done;
	}

	/* Collect name counts for composite types in base BTF.  If multiple
	 * instances of a struct/union of the same name exist, we need to use
	 * size to determine which to map to since name alone is ambiguous.
	 */
	base_name_cnt = calloc(r->base_str_len, sizeof(*base_name_cnt));
	if (!base_name_cnt) {
		err = -ENOMEM;
		goto done;
	}
	for (id = 1; id < r->nr_base_types; id++) {
		base_t = btf_type_by_id(r->base_btf, id);
		if (!btf_is_composite(base_t) || !base_t->name_off)
			continue;
		if (base_name_cnt[base_t->name_off] < 255)
			base_name_cnt[base_t->name_off]++;
	}

	/* Now search base BTF for matching distilled base BTF types. */
	for (id = 1; id < r->nr_base_types; id++) {
		struct btf_name_info *dist_info, base_info = {};
		int dist_kind, base_kind;

		base_t = btf_type_by_id(r->base_btf, id);
		/* distilled base consists of named types only. */
		if (!base_t->name_off)
			continue;
		base_kind = btf_kind(base_t);
		base_info.id = id;
		base_info.name = btf__name_by_offset(r->base_btf, base_t->name_off);
		switch (base_kind) {
		case BTF_KIND_INT:
		case BTF_KIND_FLOAT:
		case BTF_KIND_ENUM:
		case BTF_KIND_ENUM64:
			/* These types should match both name and size */
			base_info.needs_size = true;
			base_info.size = base_t->size;
			break;
		case BTF_KIND_FWD:
			/* No size considerations for fwds. */
			break;
		case BTF_KIND_STRUCT:
		case BTF_KIND_UNION:
			/* Size only needs to be used for struct/union if there
			 * are multiple types in base BTF with the same name.
			 * If there are multiple _distilled_ types with the same
			 * name (a very unlikely scenario), that doesn't matter
			 * unless corresponding _base_ types to match them are
			 * missing.
			 */
			base_info.needs_size = base_name_cnt[base_t->name_off] > 1;
			base_info.size = base_t->size;
			break;
		default:
			continue;
		}
		/* iterate over all matching distilled base types */
		for (dist_info = search_btf_name_size(&base_info, info, r->nr_dist_base_types);
		     dist_info != NULL && dist_info < info_end &&
		     cmp_btf_name_size(&base_info, dist_info) == 0;
		     dist_info++) {
			if (!dist_info->id || dist_info->id >= r->nr_dist_base_types) {
				pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n",
					id, dist_info->id);
				err = -EINVAL;
				goto done;
			}
			dist_t = btf_type_by_id(r->dist_base_btf, dist_info->id);
			dist_kind = btf_kind(dist_t);

			/* Validate that the found distilled type is compatible.
			 * Do not error out on mismatch as another match may
			 * occur for an identically-named type.
			 */
			switch (dist_kind) {
			case BTF_KIND_FWD:
				switch (base_kind) {
				case BTF_KIND_FWD:
					if (btf_kflag(dist_t) != btf_kflag(base_t))
						continue;
					break;
				case BTF_KIND_STRUCT:
					if (btf_kflag(base_t))
						continue;
					break;
				case BTF_KIND_UNION:
					if (!btf_kflag(base_t))
						continue;
					break;
				default:
					continue;
				}
				break;
			case BTF_KIND_INT:
				if (dist_kind != base_kind ||
				    btf_int_encoding(base_t) != btf_int_encoding(dist_t))
					continue;
				break;
			case BTF_KIND_FLOAT:
				if (dist_kind != base_kind)
					continue;
				break;
			case BTF_KIND_ENUM:
				/* ENUM and ENUM64 are encoded as sized ENUM in
				 * distilled base BTF.
				 */
				if (base_kind != dist_kind && base_kind != BTF_KIND_ENUM64)
					continue;
				break;
			case BTF_KIND_STRUCT:
			case BTF_KIND_UNION:
				/* size verification is required for embedded
				 * struct/unions.
				 */
				if (r->id_map[dist_info->id] == BTF_IS_EMBEDDED &&
				    base_t->size != dist_t->size)
					continue;
				break;
			default:
				continue;
			}
			if (r->id_map[dist_info->id] &&
			    r->id_map[dist_info->id] != BTF_IS_EMBEDDED) {
				/* we already have a match; this tells us that
				 * multiple base types of the same name
				 * have the same size, since for cases where
				 * multiple types have the same name we match
				 * on name and size.  In this case, we have
				 * no way of determining which to relocate
				 * to in base BTF, so error out.
				 */
				pr_warn("distilled base BTF type '%s' [%u], size %u has multiple candidates of the same size (ids [%u, %u]) in base BTF\n",
					base_info.name, dist_info->id,
					base_t->size, id, r->id_map[dist_info->id]);
				err = -EINVAL;
				goto done;
			}
			/* map id and name */
			r->id_map[dist_info->id] = id;
			r->str_map[dist_t->name_off] = base_t->name_off;
		}
	}
	/* ensure all distilled BTF ids now have a mapping... */
	for (id = 1; id < r->nr_dist_base_types; id++) {
		const char *name;

		if (r->id_map[id] && r->id_map[id] != BTF_IS_EMBEDDED)
			continue;
		dist_t = btf_type_by_id(r->dist_base_btf, id);
		name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
		pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n",
			name, id);
		err = -EINVAL;
		break;
	}
done:
	free(base_name_cnt);
	free(info);
	return err;
}

/* distilled base should only have named int/float/enum/fwd/struct/union types. */
static int btf_relocate_validate_distilled_base(struct btf_relocate *r)
{
	unsigned int i;

	for (i = 1; i < r->nr_dist_base_types; i++) {
		struct btf_type *t = btf_type_by_id(r->dist_base_btf, i);
		int kind = btf_kind(t);

		switch (kind) {
		case BTF_KIND_INT:
		case BTF_KIND_FLOAT:
		case BTF_KIND_ENUM:
		case BTF_KIND_STRUCT:
		case BTF_KIND_UNION:
		case BTF_KIND_FWD:
			if (t->name_off)
				break;
			pr_warn("type [%d], kind [%d] is invalid for distilled base BTF; it is anonymous\n",
				i, kind);
			return -EINVAL;
		default:
			pr_warn("type [%d] in distilled based BTF has unexpected kind [%d]\n",
				i, kind);
			return -EINVAL;
		}
	}
	return 0;
}

static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i)
{
	struct btf_type *t = btf_type_by_id(r->btf, i);
	struct btf_field_iter it;
	__u32 *str_off;
	int off, err;

	err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS);
	if (err)
		return err;

	while ((str_off = btf_field_iter_next(&it))) {
		if (!*str_off)
			continue;
		if (*str_off >= r->dist_str_len) {
			*str_off += r->base_str_len - r->dist_str_len;
		} else {
			off = r->str_map[*str_off];
			if (!off) {
				pr_warn("string '%s' [offset %u] is not mapped to base BTF",
					btf__str_by_offset(r->btf, off), *str_off);
				return -ENOENT;
			}
			*str_off = off;
		}
	}
	return 0;
}

/* If successful, output of relocation is updated BTF with base BTF pointing
 * at base_btf, and type ids, strings adjusted accordingly.
 */
int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map)
{
	unsigned int nr_types = btf__type_cnt(btf);
	const struct btf_header *dist_base_hdr;
	const struct btf_header *base_hdr;
	struct btf_relocate r = {};
	int err = 0;
	__u32 id, i;

	r.dist_base_btf = btf__base_btf(btf);
	if (!base_btf || r.dist_base_btf == base_btf)
		return -EINVAL;

	r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf);
	r.nr_base_types = btf__type_cnt(base_btf);
	r.nr_split_types = nr_types - r.nr_dist_base_types;
	r.btf = btf;
	r.base_btf = base_btf;

	r.id_map = calloc(nr_types, sizeof(*r.id_map));
	r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map));
	dist_base_hdr = btf_header(r.dist_base_btf);
	base_hdr = btf_header(r.base_btf);
	r.dist_str_len = dist_base_hdr->str_len;
	r.base_str_len = base_hdr->str_len;
	if (!r.id_map || !r.str_map) {
		err = -ENOMEM;
		goto err_out;
	}

	err = btf_relocate_validate_distilled_base(&r);
	if (err)
		goto err_out;

	/* Split BTF ids need to be adjusted as base and distilled base
	 * have different numbers of types, changing the start id of split
	 * BTF.
	 */
	for (id = r.nr_dist_base_types; id < nr_types; id++)
		r.id_map[id] = id + r.nr_base_types - r.nr_dist_base_types;

	/* Build a map from distilled base ids to actual base BTF ids; it is used
	 * to update split BTF id references.  Also build a str_map mapping from
	 * distilled base BTF names to base BTF names.
	 */
	err = btf_relocate_map_distilled_base(&r);
	if (err)
		goto err_out;

	/* Next, rewrite type ids in split BTF, replacing split ids with updated
	 * ids based on number of types in base BTF, and base ids with
	 * relocated ids from base_btf.
	 */
	for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) {
		err = btf_relocate_rewrite_type_id(&r, id);
		if (err)
			goto err_out;
	}
	/* String offsets now need to be updated using the str_map. */
	for (i = 0; i < r.nr_split_types; i++) {
		err = btf_relocate_rewrite_strs(&r, i + r.nr_dist_base_types);
		if (err)
			goto err_out;
	}
	/* Finally reset base BTF to be base_btf */
	btf_set_base_btf(btf, base_btf);

	if (id_map) {
		*id_map = r.id_map;
		r.id_map = NULL;
	}
err_out:
	free(r.id_map);
	free(r.str_map);
	return err;
}