cgroup.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Functions to manage eBPF programs attached to cgroups
 *
 * Copyright (c) 2016 Daniel Mack
 */

#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/cgroup.h>
#include <linux/filter.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/string.h>
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
#include <linux/bpf_lsm.h>
#include <linux/bpf_verifier.h>
#include <net/sock.h>
#include <net/bpf_sk_storage.h>

#include "../cgroup/cgroup-internal.h"

DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
EXPORT_SYMBOL(…);

/* __always_inline is necessary to prevent indirect call through run_prog
 * function pointer.
 */
static __always_inline int
bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
		      enum cgroup_bpf_attach_type atype,
		      const void *ctx, bpf_prog_run_fn run_prog,
		      int retval, u32 *ret_flags)
{ … }

unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
				       const struct bpf_insn *insn)
{ … }

unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
					 const struct bpf_insn *insn)
{ … }

unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
					  const struct bpf_insn *insn)
{ … }

#ifdef CONFIG_BPF_LSM
struct cgroup_lsm_atype { … };

static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM];

static enum cgroup_bpf_attach_type
bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
{ … }

void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype)
{ … }

void bpf_cgroup_atype_put(int cgroup_atype)
{ … }
#else
static enum cgroup_bpf_attach_type
bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
{
	if (attach_type != BPF_LSM_CGROUP)
		return to_cgroup_bpf_attach_type(attach_type);
	return -EOPNOTSUPP;
}
#endif /* CONFIG_BPF_LSM */

void cgroup_bpf_offline(struct cgroup *cgrp)
{ … }

static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
{ … }

static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[],
				     struct bpf_cgroup_storage *new_storages[],
				     enum bpf_attach_type type,
				     struct bpf_prog *prog,
				     struct cgroup *cgrp)
{ … }

static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[],
				       struct bpf_cgroup_storage *src[])
{ … }

static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
				     struct cgroup *cgrp,
				     enum bpf_attach_type attach_type)
{ … }

/* Called when bpf_cgroup_link is auto-detached from dying cgroup.
 * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
 * doesn't free link memory, which will eventually be done by bpf_link's
 * release() callback, when its last FD is closed.
 */
static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)
{ … }

/**
 * cgroup_bpf_release() - put references of all bpf programs and
 *                        release all cgroup bpf data
 * @work: work structure embedded into the cgroup to modify
 */
static void cgroup_bpf_release(struct work_struct *work)
{ … }

/**
 * cgroup_bpf_release_fn() - callback used to schedule releasing
 *                           of bpf cgroup data
 * @ref: percpu ref counter structure
 */
static void cgroup_bpf_release_fn(struct percpu_ref *ref)
{ … }

/* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
 * link or direct prog.
 */
static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
{ … }

/* count number of elements in the list.
 * it's slow but the list cannot be long
 */
static u32 prog_list_length(struct hlist_head *head)
{ … }

/* if parent has non-overridable prog attached,
 * disallow attaching new programs to the descendent cgroup.
 * if parent has overridable or multi-prog, allow attaching
 */
static bool hierarchy_allows_attach(struct cgroup *cgrp,
				    enum cgroup_bpf_attach_type atype)
{ … }

/* compute a chain of effective programs for a given cgroup:
 * start from the list of programs in this cgroup and add
 * all parent programs.
 * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
 * to programs in this cgroup
 */
static int compute_effective_progs(struct cgroup *cgrp,
				   enum cgroup_bpf_attach_type atype,
				   struct bpf_prog_array **array)
{ … }

static void activate_effective_progs(struct cgroup *cgrp,
				     enum cgroup_bpf_attach_type atype,
				     struct bpf_prog_array *old_array)
{ … }

/**
 * cgroup_bpf_inherit() - inherit effective programs from parent
 * @cgrp: the cgroup to modify
 */
int cgroup_bpf_inherit(struct cgroup *cgrp)
{ … }

static int update_effective_progs(struct cgroup *cgrp,
				  enum cgroup_bpf_attach_type atype)
{ … }

#define BPF_CGROUP_MAX_PROGS …

static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
					       struct bpf_prog *prog,
					       struct bpf_cgroup_link *link,
					       struct bpf_prog *replace_prog,
					       bool allow_multi)
{ … }

/**
 * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
 *                         propagate the change to descendants
 * @cgrp: The cgroup which descendants to traverse
 * @prog: A program to attach
 * @link: A link to attach
 * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
 * @type: Type of attach operation
 * @flags: Option flags
 *
 * Exactly one of @prog or @link can be non-null.
 * Must be called with cgroup_mutex held.
 */
static int __cgroup_bpf_attach(struct cgroup *cgrp,
			       struct bpf_prog *prog, struct bpf_prog *replace_prog,
			       struct bpf_cgroup_link *link,
			       enum bpf_attach_type type, u32 flags)
{ … }

static int cgroup_bpf_attach(struct cgroup *cgrp,
			     struct bpf_prog *prog, struct bpf_prog *replace_prog,
			     struct bpf_cgroup_link *link,
			     enum bpf_attach_type type,
			     u32 flags)
{ … }

/* Swap updated BPF program for given link in effective program arrays across
 * all descendant cgroups. This function is guaranteed to succeed.
 */
static void replace_effective_prog(struct cgroup *cgrp,
				   enum cgroup_bpf_attach_type atype,
				   struct bpf_cgroup_link *link)
{ … }

/**
 * __cgroup_bpf_replace() - Replace link's program and propagate the change
 *                          to descendants
 * @cgrp: The cgroup which descendants to traverse
 * @link: A link for which to replace BPF program
 * @new_prog: &struct bpf_prog for the target BPF program with its refcnt
 *            incremented
 *
 * Must be called with cgroup_mutex held.
 */
static int __cgroup_bpf_replace(struct cgroup *cgrp,
				struct bpf_cgroup_link *link,
				struct bpf_prog *new_prog)
{ … }

static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
			      struct bpf_prog *old_prog)
{ … }

static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
					       struct bpf_prog *prog,
					       struct bpf_cgroup_link *link,
					       bool allow_multi)
{ … }

/**
 * purge_effective_progs() - After compute_effective_progs fails to alloc new
 *                           cgrp->bpf.inactive table we can recover by
 *                           recomputing the array in place.
 *
 * @cgrp: The cgroup which descendants to travers
 * @prog: A program to detach or NULL
 * @link: A link to detach or NULL
 * @atype: Type of detach operation
 */
static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
				  struct bpf_cgroup_link *link,
				  enum cgroup_bpf_attach_type atype)
{ … }

/**
 * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
 *                         propagate the change to descendants
 * @cgrp: The cgroup which descendants to traverse
 * @prog: A program to detach or NULL
 * @link: A link to detach or NULL
 * @type: Type of detach operation
 *
 * At most one of @prog or @link can be non-NULL.
 * Must be called with cgroup_mutex held.
 */
static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
			       struct bpf_cgroup_link *link, enum bpf_attach_type type)
{ … }

static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
			     enum bpf_attach_type type)
{ … }

/* Must be called with cgroup_mutex held to avoid races. */
static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
			      union bpf_attr __user *uattr)
{ … }

static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
			    union bpf_attr __user *uattr)
{ … }

int cgroup_bpf_prog_attach(const union bpf_attr *attr,
			   enum bpf_prog_type ptype, struct bpf_prog *prog)
{ … }

int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
{ … }

static void bpf_cgroup_link_release(struct bpf_link *link)
{ … }

static void bpf_cgroup_link_dealloc(struct bpf_link *link)
{ … }

static int bpf_cgroup_link_detach(struct bpf_link *link)
{ … }

static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
					struct seq_file *seq)
{ … }

static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
					  struct bpf_link_info *info)
{ … }

static const struct bpf_link_ops bpf_cgroup_link_lops = …;

int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{ … }

int cgroup_bpf_prog_query(const union bpf_attr *attr,
			  union bpf_attr __user *uattr)
{ … }

/**
 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
 * @sk: The socket sending or receiving traffic
 * @skb: The skb that is being sent or received
 * @atype: The type of program to be executed
 *
 * If no socket is passed, or the socket is not of type INET or INET6,
 * this function does nothing and returns 0.
 *
 * The program type passed in via @type must be suitable for network
 * filtering. No further check is performed to assert that.
 *
 * For egress packets, this function can return:
 *   NET_XMIT_SUCCESS    (0)	- continue with packet output
 *   NET_XMIT_DROP       (1)	- drop packet and notify TCP to call cwr
 *   NET_XMIT_CN         (2)	- continue with packet output and notify TCP
 *				  to call cwr
 *   -err			- drop packet
 *
 * For ingress packets, this function will return -EPERM if any
 * attached program was found and if it returned != 1 during execution.
 * Otherwise 0 is returned.
 */
int __cgroup_bpf_run_filter_skb(struct sock *sk,
				struct sk_buff *skb,
				enum cgroup_bpf_attach_type atype)
{ … }
EXPORT_SYMBOL(…);

/**
 * __cgroup_bpf_run_filter_sk() - Run a program on a sock
 * @sk: sock structure to manipulate
 * @atype: The type of program to be executed
 *
 * socket is passed is expected to be of type INET or INET6.
 *
 * The program type passed in via @type must be suitable for sock
 * filtering. No further check is performed to assert that.
 *
 * This function will return %-EPERM if any if an attached program was found
 * and if it returned != 1 during execution. In all other cases, 0 is returned.
 */
int __cgroup_bpf_run_filter_sk(struct sock *sk,
			       enum cgroup_bpf_attach_type atype)
{ … }
EXPORT_SYMBOL(…);

/**
 * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
 *                                       provided by user sockaddr
 * @sk: sock struct that will use sockaddr
 * @uaddr: sockaddr struct provided by user
 * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is
 *            read-only for AF_INET[6] uaddr but can be modified for AF_UNIX
 *            uaddr.
 * @atype: The type of program to be executed
 * @t_ctx: Pointer to attach type specific context
 * @flags: Pointer to u32 which contains higher bits of BPF program
 *         return value (OR'ed together).
 *
 * socket is expected to be of type INET, INET6 or UNIX.
 *
 * This function will return %-EPERM if an attached program is found and
 * returned value != 1 during execution. In all other cases, 0 is returned.
 */
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
				      struct sockaddr *uaddr,
				      int *uaddrlen,
				      enum cgroup_bpf_attach_type atype,
				      void *t_ctx,
				      u32 *flags)
{ … }
EXPORT_SYMBOL(…);

/**
 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
 * @sk: socket to get cgroup from
 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
 * sk with connection information (IP addresses, etc.) May not contain
 * cgroup info if it is a req sock.
 * @atype: The type of program to be executed
 *
 * socket passed is expected to be of type INET or INET6.
 *
 * The program type passed in via @type must be suitable for sock_ops
 * filtering. No further check is performed to assert that.
 *
 * This function will return %-EPERM if any if an attached program was found
 * and if it returned != 1 during execution. In all other cases, 0 is returned.
 */
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
				     struct bpf_sock_ops_kern *sock_ops,
				     enum cgroup_bpf_attach_type atype)
{ … }
EXPORT_SYMBOL(…);

int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
				      short access, enum cgroup_bpf_attach_type atype)
{ … }

BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
{ … }

const struct bpf_func_proto bpf_get_local_storage_proto = …;

BPF_CALL_0(bpf_get_retval)
{ … }

const struct bpf_func_proto bpf_get_retval_proto = …;

BPF_CALL_1(bpf_set_retval, int, retval)
{ … }

const struct bpf_func_proto bpf_set_retval_proto = …;

static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{ … }

static bool cgroup_dev_is_valid_access(int off, int size,
				       enum bpf_access_type type,
				       const struct bpf_prog *prog,
				       struct bpf_insn_access_aux *info)
{ … }

const struct bpf_prog_ops cg_dev_prog_ops = …;

const struct bpf_verifier_ops cg_dev_verifier_ops = …;

/**
 * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
 *
 * @head: sysctl table header
 * @table: sysctl table
 * @write: sysctl is being read (= 0) or written (= 1)
 * @buf: pointer to buffer (in and out)
 * @pcount: value-result argument: value is size of buffer pointed to by @buf,
 *	result is size of @new_buf if program set new value, initial value
 *	otherwise
 * @ppos: value-result argument: value is position at which read from or write
 *	to sysctl is happening, result is new position if program overrode it,
 *	initial value otherwise
 * @atype: type of program to be executed
 *
 * Program is run when sysctl is being accessed, either read or written, and
 * can allow or deny such access.
 *
 * This function will return %-EPERM if an attached program is found and
 * returned value != 1 during execution. In all other cases 0 is returned.
 */
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
				   struct ctl_table *table, int write,
				   char **buf, size_t *pcount, loff_t *ppos,
				   enum cgroup_bpf_attach_type atype)
{ … }

#ifdef CONFIG_NET
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
			     struct bpf_sockopt_buf *buf)
{ … }

static void sockopt_free_buf(struct bpf_sockopt_kern *ctx,
			     struct bpf_sockopt_buf *buf)
{ … }

static bool sockopt_buf_allocated(struct bpf_sockopt_kern *ctx,
				  struct bpf_sockopt_buf *buf)
{ … }

int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
				       int *optname, sockptr_t optval,
				       int *optlen, char **kernel_optval)
{ … }

int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
				       int optname, sockptr_t optval,
				       sockptr_t optlen, int max_optlen,
				       int retval)
{ … }

int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
					    int optname, void *optval,
					    int *optlen, int retval)
{ … }
#endif

static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
			      size_t *lenp)
{ … }

BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
	   size_t, buf_len, u64, flags)
{ … }

static const struct bpf_func_proto bpf_sysctl_get_name_proto = …;

static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
			     size_t src_len)
{ … }

BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
	   char *, buf, size_t, buf_len)
{ … }

static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = …;

BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
	   size_t, buf_len)
{ … }

static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = …;

BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
	   const char *, buf, size_t, buf_len)
{ … }

static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = …;

static const struct bpf_func_proto *
sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{ … }

static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
				   const struct bpf_prog *prog,
				   struct bpf_insn_access_aux *info)
{ … }

static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
				     const struct bpf_insn *si,
				     struct bpf_insn *insn_buf,
				     struct bpf_prog *prog, u32 *target_size)
{ … }

const struct bpf_verifier_ops cg_sysctl_verifier_ops = …;

const struct bpf_prog_ops cg_sysctl_prog_ops = …;

#ifdef CONFIG_NET
BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx)
{ … }

static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = …;
#endif

static const struct bpf_func_proto *
cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{ … }

static bool cg_sockopt_is_valid_access(int off, int size,
				       enum bpf_access_type type,
				       const struct bpf_prog *prog,
				       struct bpf_insn_access_aux *info)
{ … }

#define CG_SOCKOPT_READ_FIELD(F) …

#define CG_SOCKOPT_WRITE_FIELD(F) …

static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
					 const struct bpf_insn *si,
					 struct bpf_insn *insn_buf,
					 struct bpf_prog *prog,
					 u32 *target_size)
{ … }

static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
				   bool direct_write,
				   const struct bpf_prog *prog)
{ … }

const struct bpf_verifier_ops cg_sockopt_verifier_ops = …;

const struct bpf_prog_ops cg_sockopt_prog_ops = …;

/* Common helpers for cgroup hooks. */
const struct bpf_func_proto *
cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{ … }

/* Common helpers for cgroup hooks with valid process context. */
const struct bpf_func_proto *
cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{ … }
linux/kernel/bpf/cgroup.c