linux/kernel/seccomp.c

// SPDX-License-Identifier: GPL-2.0
/*
 * linux/kernel/seccomp.c
 *
 * Copyright 2004-2005  Andrea Arcangeli <[email protected]>
 *
 * Copyright (C) 2012 Google, Inc.
 * Will Drewry <[email protected]>
 *
 * This defines a simple but solid secure-computing facility.
 *
 * Mode 1 uses a fixed list of allowed system calls.
 * Mode 2 allows user-defined system call filters in the form
 *        of Berkeley Packet Filters/Linux Socket Filters.
 */
#define pr_fmt(fmt)

#include <linux/refcount.h>
#include <linux/audit.h>
#include <linux/compat.h>
#include <linux/coredump.h>
#include <linux/kmemleak.h>
#include <linux/nospec.h>
#include <linux/prctl.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/seccomp.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/sysctl.h>

/* Not exposed in headers: strictly internal use only. */
#define SECCOMP_MODE_DEAD

#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
#include <asm/syscall.h>
#endif

#ifdef CONFIG_SECCOMP_FILTER
#include <linux/file.h>
#include <linux/filter.h>
#include <linux/pid.h>
#include <linux/ptrace.h>
#include <linux/capability.h>
#include <linux/uaccess.h>
#include <linux/anon_inodes.h>
#include <linux/lockdep.h>

/*
 * When SECCOMP_IOCTL_NOTIF_ID_VALID was first introduced, it had the
 * wrong direction flag in the ioctl number. This is the broken one,
 * which the kernel needs to keep supporting until all userspaces stop
 * using the wrong command number.
 */
#define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR

enum notify_state {};

struct seccomp_knotif {};

/**
 * struct seccomp_kaddfd - container for seccomp_addfd ioctl messages
 *
 * @file: A reference to the file to install in the other task
 * @fd: The fd number to install it at. If the fd number is -1, it means the
 *      installing process should allocate the fd as normal.
 * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
 *         is allowed.
 * @ioctl_flags: The flags used for the seccomp_addfd ioctl.
 * @setfd: whether or not SECCOMP_ADDFD_FLAG_SETFD was set during notify_addfd
 * @ret: The return value of the installing process. It is set to the fd num
 *       upon success (>= 0).
 * @completion: Indicates that the installing process has completed fd
 *              installation, or gone away (either due to successful
 *              reply, or signal)
 * @list: list_head for chaining seccomp_kaddfd together.
 *
 */
struct seccomp_kaddfd {};

/**
 * struct notification - container for seccomp userspace notifications. Since
 * most seccomp filters will not have notification listeners attached and this
 * structure is fairly large, we store the notification-specific stuff in a
 * separate structure.
 *
 * @requests: A semaphore that users of this notification can wait on for
 *            changes. Actual reads and writes are still controlled with
 *            filter->notify_lock.
 * @flags: A set of SECCOMP_USER_NOTIF_FD_* flags.
 * @next_id: The id of the next request.
 * @notifications: A list of struct seccomp_knotif elements.
 */

struct notification {};

#ifdef SECCOMP_ARCH_NATIVE
/**
 * struct action_cache - per-filter cache of seccomp actions per
 * arch/syscall pair
 *
 * @allow_native: A bitmap where each bit represents whether the
 *		  filter will always allow the syscall, for the
 *		  native architecture.
 * @allow_compat: A bitmap where each bit represents whether the
 *		  filter will always allow the syscall, for the
 *		  compat architecture.
 */
struct action_cache {};
#else
struct action_cache { };

static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
					     const struct seccomp_data *sd)
{
	return false;
}

static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
{
}
#endif /* SECCOMP_ARCH_NATIVE */

/**
 * struct seccomp_filter - container for seccomp BPF programs
 *
 * @refs: Reference count to manage the object lifetime.
 *	  A filter's reference count is incremented for each directly
 *	  attached task, once for the dependent filter, and if
 *	  requested for the user notifier. When @refs reaches zero,
 *	  the filter can be freed.
 * @users: A filter's @users count is incremented for each directly
 *         attached task (filter installation, fork(), thread_sync),
 *	   and once for the dependent filter (tracked in filter->prev).
 *	   When it reaches zero it indicates that no direct or indirect
 *	   users of that filter exist. No new tasks can get associated with
 *	   this filter after reaching 0. The @users count is always smaller
 *	   or equal to @refs. Hence, reaching 0 for @users does not mean
 *	   the filter can be freed.
 * @cache: cache of arch/syscall mappings to actions
 * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
 * @wait_killable_recv: Put notifying process in killable state once the
 *			notification is received by the userspace listener.
 * @prev: points to a previously installed, or inherited, filter
 * @prog: the BPF program to evaluate
 * @notif: the struct that holds all notification related information
 * @notify_lock: A lock for all notification-related accesses.
 * @wqh: A wait queue for poll if a notifier is in use.
 *
 * seccomp_filter objects are organized in a tree linked via the @prev
 * pointer.  For any task, it appears to be a singly-linked list starting
 * with current->seccomp.filter, the most recently attached or inherited filter.
 * However, multiple filters may share a @prev node, by way of fork(), which
 * results in a unidirectional tree existing in memory.  This is similar to
 * how namespaces work.
 *
 * seccomp_filter objects should never be modified after being attached
 * to a task_struct (other than @refs).
 */
struct seccomp_filter {};

/* Limit any path through the tree to 256KB worth of instructions. */
#define MAX_INSNS_PER_PATH

/*
 * Endianness is explicitly ignored and left for BPF program authors to manage
 * as per the specific architecture.
 */
static void populate_seccomp_data(struct seccomp_data *sd)
{}

/**
 *	seccomp_check_filter - verify seccomp filter code
 *	@filter: filter to verify
 *	@flen: length of filter
 *
 * Takes a previously checked filter (by bpf_check_classic) and
 * redirects all filter code that loads struct sk_buff data
 * and related data through seccomp_bpf_load.  It also
 * enforces length and alignment checking of those loads.
 *
 * Returns 0 if the rule set is legal or -EINVAL if not.
 */
static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
{}

#ifdef SECCOMP_ARCH_NATIVE
static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap,
						    size_t bitmap_size,
						    int syscall_nr)
{}

/**
 * seccomp_cache_check_allow - lookup seccomp cache
 * @sfilter: The seccomp filter
 * @sd: The seccomp data to lookup the cache with
 *
 * Returns true if the seccomp_data is cached and allowed.
 */
static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
					     const struct seccomp_data *sd)
{}
#endif /* SECCOMP_ARCH_NATIVE */

#define ACTION_ONLY(ret)
/**
 * seccomp_run_filters - evaluates all seccomp filters against @sd
 * @sd: optional seccomp data to be passed to filters
 * @match: stores struct seccomp_filter that resulted in the return value,
 *         unless filter returned SECCOMP_RET_ALLOW, in which case it will
 *         be unchanged.
 *
 * Returns valid seccomp BPF response codes.
 */
static u32 seccomp_run_filters(const struct seccomp_data *sd,
			       struct seccomp_filter **match)
{}
#endif /* CONFIG_SECCOMP_FILTER */

static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
{}

void __weak arch_seccomp_spec_mitigate(struct task_struct *task) {}

static inline void seccomp_assign_mode(struct task_struct *task,
				       unsigned long seccomp_mode,
				       unsigned long flags)
{}

#ifdef CONFIG_SECCOMP_FILTER
/* Returns 1 if the parent is an ancestor of the child. */
static int is_ancestor(struct seccomp_filter *parent,
		       struct seccomp_filter *child)
{}

/**
 * seccomp_can_sync_threads: checks if all threads can be synchronized
 *
 * Expects sighand and cred_guard_mutex locks to be held.
 *
 * Returns 0 on success, -ve on error, or the pid of a thread which was
 * either not in the correct seccomp mode or did not have an ancestral
 * seccomp filter.
 */
static inline pid_t seccomp_can_sync_threads(void)
{}

static inline void seccomp_filter_free(struct seccomp_filter *filter)
{}

static void __seccomp_filter_orphan(struct seccomp_filter *orig)
{}

static void __put_seccomp_filter(struct seccomp_filter *orig)
{}

static void __seccomp_filter_release(struct seccomp_filter *orig)
{}

/**
 * seccomp_filter_release - Detach the task from its filter tree,
 *			    drop its reference count, and notify
 *			    about unused filters
 *
 * @tsk: task the filter should be released from.
 *
 * This function should only be called when the task is exiting as
 * it detaches it from its filter tree. PF_EXITING has to be set
 * for the task.
 */
void seccomp_filter_release(struct task_struct *tsk)
{}

/**
 * seccomp_sync_threads: sets all threads to use current's filter
 *
 * @flags: SECCOMP_FILTER_FLAG_* flags to set during sync.
 *
 * Expects sighand and cred_guard_mutex locks to be held, and for
 * seccomp_can_sync_threads() to have returned success already
 * without dropping the locks.
 *
 */
static inline void seccomp_sync_threads(unsigned long flags)
{}

/**
 * seccomp_prepare_filter: Prepares a seccomp filter for use.
 * @fprog: BPF program to install
 *
 * Returns filter on success or an ERR_PTR on failure.
 */
static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
{}

/**
 * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
 * @user_filter: pointer to the user data containing a sock_fprog.
 *
 * Returns 0 on success and non-zero otherwise.
 */
static struct seccomp_filter *
seccomp_prepare_user_filter(const char __user *user_filter)
{}

#ifdef SECCOMP_ARCH_NATIVE
/**
 * seccomp_is_const_allow - check if filter is constant allow with given data
 * @fprog: The BPF programs
 * @sd: The seccomp data to check against, only syscall number and arch
 *      number are considered constant.
 */
static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
				   struct seccomp_data *sd)
{}

static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
					 void *bitmap, const void *bitmap_prev,
					 size_t bitmap_size, int arch)
{}

/**
 * seccomp_cache_prepare - emulate the filter to find cacheable syscalls
 * @sfilter: The seccomp filter
 *
 * Returns 0 if successful or -errno if error occurred.
 */
static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
{}
#endif /* SECCOMP_ARCH_NATIVE */

/**
 * seccomp_attach_filter: validate and attach filter
 * @flags:  flags to change filter behavior
 * @filter: seccomp filter to add to the current process
 *
 * Caller must be holding current->sighand->siglock lock.
 *
 * Returns 0 on success, -ve on error, or
 *   - in TSYNC mode: the pid of a thread which was either not in the correct
 *     seccomp mode or did not have an ancestral seccomp filter
 *   - in NEW_LISTENER mode: the fd of the new listener
 */
static long seccomp_attach_filter(unsigned int flags,
				  struct seccomp_filter *filter)
{}

static void __get_seccomp_filter(struct seccomp_filter *filter)
{}

/* get_seccomp_filter - increments the reference count of the filter on @tsk */
void get_seccomp_filter(struct task_struct *tsk)
{}

#endif	/* CONFIG_SECCOMP_FILTER */

/* For use with seccomp_actions_logged */
#define SECCOMP_LOG_KILL_PROCESS
#define SECCOMP_LOG_KILL_THREAD
#define SECCOMP_LOG_TRAP
#define SECCOMP_LOG_ERRNO
#define SECCOMP_LOG_TRACE
#define SECCOMP_LOG_LOG
#define SECCOMP_LOG_ALLOW
#define SECCOMP_LOG_USER_NOTIF

static u32 seccomp_actions_logged =;

static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
			       bool requested)
{}

/*
 * Secure computing mode 1 allows only read/write/exit/sigreturn.
 * To be fully secure this must be combined with rlimit
 * to limit the stack allocations too.
 */
static const int mode1_syscalls[] =;

static void __secure_computing_strict(int this_syscall)
{}

#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
void secure_computing_strict(int this_syscall)
{
	int mode = current->seccomp.mode;

	if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
	    unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
		return;

	if (mode == SECCOMP_MODE_DISABLED)
		return;
	else if (mode == SECCOMP_MODE_STRICT)
		__secure_computing_strict(this_syscall);
	else
		BUG();
}
#else

#ifdef CONFIG_SECCOMP_FILTER
static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
{}

static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
{}

static bool should_sleep_killable(struct seccomp_filter *match,
				  struct seccomp_knotif *n)
{}

static int seccomp_do_user_notification(int this_syscall,
					struct seccomp_filter *match,
					const struct seccomp_data *sd)
{}

static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
			    const bool recheck_after_trace)
{}
#else
static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
			    const bool recheck_after_trace)
{
	BUG();

	return -1;
}
#endif

int __secure_computing(const struct seccomp_data *sd)
{}
#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */

long prctl_get_seccomp(void)
{}

/**
 * seccomp_set_mode_strict: internal function for setting strict seccomp
 *
 * Once current->seccomp.mode is non-zero, it may not be changed.
 *
 * Returns 0 on success or -EINVAL on failure.
 */
static long seccomp_set_mode_strict(void)
{}

#ifdef CONFIG_SECCOMP_FILTER
static void seccomp_notify_free(struct seccomp_filter *filter)
{}

static void seccomp_notify_detach(struct seccomp_filter *filter)
{}

static int seccomp_notify_release(struct inode *inode, struct file *file)
{}

/* must be called with notif_lock held */
static inline struct seccomp_knotif *
find_notification(struct seccomp_filter *filter, u64 id)
{}

static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
				  void *key)
{}

static int recv_wait_event(struct seccomp_filter *filter)
{}

static long seccomp_notify_recv(struct seccomp_filter *filter,
				void __user *buf)
{}

static long seccomp_notify_send(struct seccomp_filter *filter,
				void __user *buf)
{}

static long seccomp_notify_id_valid(struct seccomp_filter *filter,
				    void __user *buf)
{}

static long seccomp_notify_set_flags(struct seccomp_filter *filter,
				    unsigned long flags)
{}

static long seccomp_notify_addfd(struct seccomp_filter *filter,
				 struct seccomp_notif_addfd __user *uaddfd,
				 unsigned int size)
{}

static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
				 unsigned long arg)
{}

static __poll_t seccomp_notify_poll(struct file *file,
				    struct poll_table_struct *poll_tab)
{}

static const struct file_operations seccomp_notify_ops =;

static struct file *init_listener(struct seccomp_filter *filter)
{}

/*
 * Does @new_child have a listener while an ancestor also has a listener?
 * If so, we'll want to reject this filter.
 * This only has to be tested for the current process, even in the TSYNC case,
 * because TSYNC installs @child with the same parent on all threads.
 * Note that @new_child is not hooked up to its parent at this point yet, so
 * we use current->seccomp.filter.
 */
static bool has_duplicate_listener(struct seccomp_filter *new_child)
{}

/**
 * seccomp_set_mode_filter: internal function for setting seccomp filter
 * @flags:  flags to change filter behavior
 * @filter: struct sock_fprog containing filter
 *
 * This function may be called repeatedly to install additional filters.
 * Every filter successfully installed will be evaluated (in reverse order)
 * for each system call the task makes.
 *
 * Once current->seccomp.mode is non-zero, it may not be changed.
 *
 * Returns 0 on success or -EINVAL on failure.
 */
static long seccomp_set_mode_filter(unsigned int flags,
				    const char __user *filter)
{}
#else
static inline long seccomp_set_mode_filter(unsigned int flags,
					   const char __user *filter)
{
	return -EINVAL;
}
#endif

static long seccomp_get_action_avail(const char __user *uaction)
{}

static long seccomp_get_notif_sizes(void __user *usizes)
{}

/* Common entry point for both prctl and syscall. */
static long do_seccomp(unsigned int op, unsigned int flags,
		       void __user *uargs)
{}

SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
			 void __user *, uargs)
{}

/**
 * prctl_set_seccomp: configures current->seccomp.mode
 * @seccomp_mode: requested mode to use
 * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
 *
 * Returns 0 on success or -EINVAL on failure.
 */
long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter)
{}

#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
static struct seccomp_filter *get_nth_filter(struct task_struct *task,
					     unsigned long filter_off)
{}

long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
			void __user *data)
{}

long seccomp_get_metadata(struct task_struct *task,
			  unsigned long size, void __user *data)
{}
#endif

#ifdef CONFIG_SYSCTL

/* Human readable action names for friendly sysctl interaction */
#define SECCOMP_RET_KILL_PROCESS_NAME
#define SECCOMP_RET_KILL_THREAD_NAME
#define SECCOMP_RET_TRAP_NAME
#define SECCOMP_RET_ERRNO_NAME
#define SECCOMP_RET_USER_NOTIF_NAME
#define SECCOMP_RET_TRACE_NAME
#define SECCOMP_RET_LOG_NAME
#define SECCOMP_RET_ALLOW_NAME

static const char seccomp_actions_avail[] =;

struct seccomp_log_name {};

static const struct seccomp_log_name seccomp_log_names[] =;

static bool seccomp_names_from_actions_logged(char *names, size_t size,
					      u32 actions_logged,
					      const char *sep)
{}

static bool seccomp_action_logged_from_name(u32 *action_logged,
					    const char *name)
{}

static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
{}

static int read_actions_logged(const struct ctl_table *ro_table, void *buffer,
			       size_t *lenp, loff_t *ppos)
{}

static int write_actions_logged(const struct ctl_table *ro_table, void *buffer,
				size_t *lenp, loff_t *ppos, u32 *actions_logged)
{}

static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
				 int ret)
{}

static int seccomp_actions_logged_handler(const struct ctl_table *ro_table, int write,
					  void *buffer, size_t *lenp,
					  loff_t *ppos)
{}

static struct ctl_table seccomp_sysctl_table[] =;

static int __init seccomp_sysctl_init(void)
{}

device_initcall()

#endif /* CONFIG_SYSCTL */

#ifdef CONFIG_SECCOMP_CACHE_DEBUG
/* Currently CONFIG_SECCOMP_CACHE_DEBUG implies SECCOMP_ARCH_NATIVE */
static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
					const void *bitmap, size_t bitmap_size)
{}

int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
			   struct pid *pid, struct task_struct *task)
{}
#endif /* CONFIG_SECCOMP_CACHE_DEBUG */