auditsc.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0-or-later
/* auditsc.c -- System-call auditing support
 * Handles all system-call specific auditing features.
 *
 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
 * Copyright 2005 Hewlett-Packard Development Company, L.P.
 * Copyright (C) 2005, 2006 IBM Corporation
 * All Rights Reserved.
 *
 * Written by Rickard E. (Rik) Faith <[email protected]>
 *
 * Many of the ideas implemented here are from Stephen C. Tweedie,
 * especially the idea of avoiding a copy by using getname.
 *
 * The method for actual interception of syscall entry and exit (not in
 * this file -- see entry.S) is based on a GPL'd patch written by
 * [email protected] and Copyright 2003 SuSE Linux AG.
 *
 * POSIX message queue support added by George Wilson <[email protected]>,
 * 2006.
 *
 * The support of additional filter rules compares (>, <, >=, <=) was
 * added by Dustin Kirkland <[email protected]>, 2005.
 *
 * Modified by Amy Griffis <[email protected]> to collect additional
 * filesystem information.
 *
 * Subject and object context labeling support added by <[email protected]>
 * and <[email protected]> for LSPP certification compliance.
 */

#define pr_fmt(fmt) …

#include <linux/init.h>
#include <asm/types.h>
#include <linux/atomic.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/socket.h>
#include <linux/mqueue.h>
#include <linux/audit.h>
#include <linux/personality.h>
#include <linux/time.h>
#include <linux/netlink.h>
#include <linux/compiler.h>
#include <asm/unistd.h>
#include <linux/security.h>
#include <linux/list.h>
#include <linux/binfmts.h>
#include <linux/highmem.h>
#include <linux/syscalls.h>
#include <asm/syscall.h>
#include <linux/capability.h>
#include <linux/fs_struct.h>
#include <linux/compat.h>
#include <linux/ctype.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/fsnotify_backend.h>
#include <uapi/linux/limits.h>
#include <uapi/linux/netfilter/nf_tables.h>
#include <uapi/linux/openat2.h> // struct open_how
#include <uapi/linux/fanotify.h>

#include "audit.h"

/* flags stating the success for a syscall */
#define AUDITSC_INVALID …
#define AUDITSC_SUCCESS …
#define AUDITSC_FAILURE …

/* no execve audit message should be longer than this (userspace limits),
 * see the note near the top of audit_log_execve_info() about this value */
#define MAX_EXECVE_AUDIT_LEN …

/* max length to print of cmdline/proctitle value during audit */
#define MAX_PROCTITLE_AUDIT_LEN …

/* number of audit rules */
int audit_n_rules;

/* determines whether we collect data for signals sent */
int audit_signals;

struct audit_aux_data { … };

/* Number of target pids per aux struct. */
#define AUDIT_AUX_PIDS …

struct audit_aux_data_pids { … };

struct audit_aux_data_bprm_fcaps { … };

struct audit_tree_refs { … };

struct audit_nfcfgop_tab { … };

static const struct audit_nfcfgop_tab audit_nfcfgs[] = …;

static int audit_match_perm(struct audit_context *ctx, int mask)
{ … }

static int audit_match_filetype(struct audit_context *ctx, int val)
{ … }

/*
 * We keep a linked list of fixed-sized (31 pointer) arrays of audit_chunk *;
 * ->first_trees points to its beginning, ->trees - to the current end of data.
 * ->tree_count is the number of free entries in array pointed to by ->trees.
 * Original condition is (NULL, NULL, 0); as soon as it grows we never revert to NULL,
 * "empty" becomes (p, p, 31) afterwards.  We don't shrink the list (and seriously,
 * it's going to remain 1-element for almost any setup) until we free context itself.
 * References in it _are_ dropped - at the same time we free/drop aux stuff.
 */

static void audit_set_auditable(struct audit_context *ctx)
{ … }

static int put_tree_ref(struct audit_context *ctx, struct audit_chunk *chunk)
{ … }

static int grow_tree_refs(struct audit_context *ctx)
{ … }

static void unroll_tree_refs(struct audit_context *ctx,
		      struct audit_tree_refs *p, int count)
{ … }

static void free_tree_refs(struct audit_context *ctx)
{ … }

static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree)
{ … }

static int audit_compare_uid(kuid_t uid,
			     struct audit_names *name,
			     struct audit_field *f,
			     struct audit_context *ctx)
{ … }

static int audit_compare_gid(kgid_t gid,
			     struct audit_names *name,
			     struct audit_field *f,
			     struct audit_context *ctx)
{ … }

static int audit_field_compare(struct task_struct *tsk,
			       const struct cred *cred,
			       struct audit_field *f,
			       struct audit_context *ctx,
			       struct audit_names *name)
{ … }

/* Determine if any context name data matches a rule's watch data */
/* Compare a task_struct with an audit_rule.  Return 1 on match, 0
 * otherwise.
 *
 * If task_creation is true, this is an explicit indication that we are
 * filtering a task rule at task creation time.  This and tsk == current are
 * the only situations where tsk->cred may be accessed without an rcu read lock.
 */
static int audit_filter_rules(struct task_struct *tsk,
			      struct audit_krule *rule,
			      struct audit_context *ctx,
			      struct audit_names *name,
			      enum audit_state *state,
			      bool task_creation)
{ … }

/* At process creation time, we can determine if system-call auditing is
 * completely disabled for this task.  Since we only have the task
 * structure at this point, we can only check uid and gid.
 */
static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
{ … }

static int audit_in_mask(const struct audit_krule *rule, unsigned long val)
{ … }

/**
 * __audit_filter_op - common filter helper for operations (syscall/uring/etc)
 * @tsk: associated task
 * @ctx: audit context
 * @list: audit filter list
 * @name: audit_name (can be NULL)
 * @op: current syscall/uring_op
 *
 * Run the udit filters specified in @list against @tsk using @ctx,
 * @name, and @op, as necessary; the caller is responsible for ensuring
 * that the call is made while the RCU read lock is held. The @name
 * parameter can be NULL, but all others must be specified.
 * Returns 1/true if the filter finds a match, 0/false if none are found.
 */
static int __audit_filter_op(struct task_struct *tsk,
			   struct audit_context *ctx,
			   struct list_head *list,
			   struct audit_names *name,
			   unsigned long op)
{ … }

/**
 * audit_filter_uring - apply filters to an io_uring operation
 * @tsk: associated task
 * @ctx: audit context
 */
static void audit_filter_uring(struct task_struct *tsk,
			       struct audit_context *ctx)
{ … }

/* At syscall exit time, this filter is called if the audit_state is
 * not low enough that auditing cannot take place, but is also not
 * high enough that we already know we have to write an audit record
 * (i.e., the state is AUDIT_STATE_BUILD).
 */
static void audit_filter_syscall(struct task_struct *tsk,
				 struct audit_context *ctx)
{ … }

/*
 * Given an audit_name check the inode hash table to see if they match.
 * Called holding the rcu read lock to protect the use of audit_inode_hash
 */
static int audit_filter_inode_name(struct task_struct *tsk,
				   struct audit_names *n,
				   struct audit_context *ctx)
{ … }

/* At syscall exit time, this filter is called if any audit_names have been
 * collected during syscall processing.  We only check rules in sublists at hash
 * buckets applicable to the inode numbers in audit_names.
 * Regarding audit_state, same rules apply as for audit_filter_syscall().
 */
void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
{ … }

static inline void audit_proctitle_free(struct audit_context *context)
{ … }

static inline void audit_free_module(struct audit_context *context)
{ … }
static inline void audit_free_names(struct audit_context *context)
{ … }

static inline void audit_free_aux(struct audit_context *context)
{ … }

/**
 * audit_reset_context - reset a audit_context structure
 * @ctx: the audit_context to reset
 *
 * All fields in the audit_context will be reset to an initial state, all
 * references held by fields will be dropped, and private memory will be
 * released.  When this function returns the audit_context will be suitable
 * for reuse, so long as the passed context is not NULL or a dummy context.
 */
static void audit_reset_context(struct audit_context *ctx)
{ … }

static inline struct audit_context *audit_alloc_context(enum audit_state state)
{ … }

/**
 * audit_alloc - allocate an audit context block for a task
 * @tsk: task
 *
 * Filter on the task information and allocate a per-task audit context
 * if necessary.  Doing so turns on system call auditing for the
 * specified task.  This is called from copy_process, so no lock is
 * needed.
 */
int audit_alloc(struct task_struct *tsk)
{ … }

static inline void audit_free_context(struct audit_context *context)
{ … }

static int audit_log_pid_context(struct audit_context *context, pid_t pid,
				 kuid_t auid, kuid_t uid, unsigned int sessionid,
				 u32 sid, char *comm)
{ … }

static void audit_log_execve_info(struct audit_context *context,
				  struct audit_buffer **ab)
{ … }

static void audit_log_cap(struct audit_buffer *ab, char *prefix,
			  kernel_cap_t *cap)
{ … }

static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
{ … }

static void audit_log_time(struct audit_context *context, struct audit_buffer **ab)
{ … }

static void show_special(struct audit_context *context, int *call_panic)
{ … }

static inline int audit_proctitle_rtrim(char *proctitle, int len)
{ … }

/*
 * audit_log_name - produce AUDIT_PATH record from struct audit_names
 * @context: audit_context for the task
 * @n: audit_names structure with reportable details
 * @path: optional path to report instead of audit_names->name
 * @record_num: record number to report when handling a list of names
 * @call_panic: optional pointer to int that will be updated if secid fails
 */
static void audit_log_name(struct audit_context *context, struct audit_names *n,
		    const struct path *path, int record_num, int *call_panic)
{ … }

static void audit_log_proctitle(void)
{ … }

/**
 * audit_log_uring - generate a AUDIT_URINGOP record
 * @ctx: the audit context
 */
static void audit_log_uring(struct audit_context *ctx)
{ … }

static void audit_log_exit(void)
{ … }

/**
 * __audit_free - free a per-task audit context
 * @tsk: task whose audit context block to free
 *
 * Called from copy_process, do_exit, and the io_uring code
 */
void __audit_free(struct task_struct *tsk)
{ … }

/**
 * audit_return_fixup - fixup the return codes in the audit_context
 * @ctx: the audit_context
 * @success: true/false value to indicate if the operation succeeded or not
 * @code: operation return code
 *
 * We need to fixup the return code in the audit logs if the actual return
 * codes are later going to be fixed by the arch specific signal handlers.
 */
static void audit_return_fixup(struct audit_context *ctx,
			       int success, long code)
{ … }

/**
 * __audit_uring_entry - prepare the kernel task's audit context for io_uring
 * @op: the io_uring opcode
 *
 * This is similar to audit_syscall_entry() but is intended for use by io_uring
 * operations.  This function should only ever be called from
 * audit_uring_entry() as we rely on the audit context checking present in that
 * function.
 */
void __audit_uring_entry(u8 op)
{ … }

/**
 * __audit_uring_exit - wrap up the kernel task's audit context after io_uring
 * @success: true/false value to indicate if the operation succeeded or not
 * @code: operation return code
 *
 * This is similar to audit_syscall_exit() but is intended for use by io_uring
 * operations.  This function should only ever be called from
 * audit_uring_exit() as we rely on the audit context checking present in that
 * function.
 */
void __audit_uring_exit(int success, long code)
{ … }

/**
 * __audit_syscall_entry - fill in an audit record at syscall entry
 * @major: major syscall type (function)
 * @a1: additional syscall register 1
 * @a2: additional syscall register 2
 * @a3: additional syscall register 3
 * @a4: additional syscall register 4
 *
 * Fill in audit context at syscall entry.  This only happens if the
 * audit context was created when the task was created and the state or
 * filters demand the audit context be built.  If the state from the
 * per-task filter or from the per-syscall filter is AUDIT_STATE_RECORD,
 * then the record will be written at syscall exit time (otherwise, it
 * will only be written if another part of the kernel requests that it
 * be written).
 */
void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2,
			   unsigned long a3, unsigned long a4)
{ … }

/**
 * __audit_syscall_exit - deallocate audit context after a system call
 * @success: success value of the syscall
 * @return_code: return value of the syscall
 *
 * Tear down after system call.  If the audit context has been marked as
 * auditable (either because of the AUDIT_STATE_RECORD state from
 * filtering, or because some other part of the kernel wrote an audit
 * message), then write out the syscall information.  In call cases,
 * free the names stored from getname().
 */
void __audit_syscall_exit(int success, long return_code)
{ … }

static inline void handle_one(const struct inode *inode)
{ … }

static void handle_path(const struct dentry *dentry)
{ … }

static struct audit_names *audit_alloc_name(struct audit_context *context,
						unsigned char type)
{ … }

/**
 * __audit_reusename - fill out filename with info from existing entry
 * @uptr: userland ptr to pathname
 *
 * Search the audit_names list for the current audit context. If there is an
 * existing entry with a matching "uptr" then return the filename
 * associated with that audit_name. If not, return NULL.
 */
struct filename *
__audit_reusename(const __user char *uptr)
{ … }

/**
 * __audit_getname - add a name to the list
 * @name: name to add
 *
 * Add a name to the list of audit names for this context.
 * Called from fs/namei.c:getname().
 */
void __audit_getname(struct filename *name)
{ … }

static inline int audit_copy_fcaps(struct audit_names *name,
				   const struct dentry *dentry)
{ … }

/* Copy inode data into an audit_names. */
static void audit_copy_inode(struct audit_names *name,
			     const struct dentry *dentry,
			     struct inode *inode, unsigned int flags)
{ … }

/**
 * __audit_inode - store the inode and device from a lookup
 * @name: name being audited
 * @dentry: dentry being audited
 * @flags: attributes for this particular entry
 */
void __audit_inode(struct filename *name, const struct dentry *dentry,
		   unsigned int flags)
{ … }

void __audit_file(const struct file *file)
{ … }

/**
 * __audit_inode_child - collect inode info for created/removed objects
 * @parent: inode of dentry parent
 * @dentry: dentry being audited
 * @type:   AUDIT_TYPE_* value that we're looking for
 *
 * For syscalls that create or remove filesystem objects, audit_inode
 * can only collect information for the filesystem object's parent.
 * This call updates the audit context with the child's information.
 * Syscalls that create a new filesystem object must be hooked after
 * the object is created.  Syscalls that remove a filesystem object
 * must be hooked prior, in order to capture the target inode during
 * unsuccessful attempts.
 */
void __audit_inode_child(struct inode *parent,
			 const struct dentry *dentry,
			 const unsigned char type)
{ … }
EXPORT_SYMBOL_GPL(…);

/**
 * auditsc_get_stamp - get local copies of audit_context values
 * @ctx: audit_context for the task
 * @t: timespec64 to store time recorded in the audit_context
 * @serial: serial value that is recorded in the audit_context
 *
 * Also sets the context as auditable.
 */
int auditsc_get_stamp(struct audit_context *ctx,
		       struct timespec64 *t, unsigned int *serial)
{ … }

/**
 * __audit_mq_open - record audit data for a POSIX MQ open
 * @oflag: open flag
 * @mode: mode bits
 * @attr: queue attributes
 *
 */
void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
{ … }

/**
 * __audit_mq_sendrecv - record audit data for a POSIX MQ timed send/receive
 * @mqdes: MQ descriptor
 * @msg_len: Message length
 * @msg_prio: Message priority
 * @abs_timeout: Message timeout in absolute time
 *
 */
void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
			const struct timespec64 *abs_timeout)
{ … }

/**
 * __audit_mq_notify - record audit data for a POSIX MQ notify
 * @mqdes: MQ descriptor
 * @notification: Notification event
 *
 */

void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
{ … }

/**
 * __audit_mq_getsetattr - record audit data for a POSIX MQ get/set attribute
 * @mqdes: MQ descriptor
 * @mqstat: MQ flags
 *
 */
void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
{ … }

/**
 * __audit_ipc_obj - record audit data for ipc object
 * @ipcp: ipc permissions
 *
 */
void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
{ … }

/**
 * __audit_ipc_set_perm - record audit data for new ipc permissions
 * @qbytes: msgq bytes
 * @uid: msgq user id
 * @gid: msgq group id
 * @mode: msgq mode (permissions)
 *
 * Called only after audit_ipc_obj().
 */
void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode)
{ … }

void __audit_bprm(struct linux_binprm *bprm)
{ … }


/**
 * __audit_socketcall - record audit data for sys_socketcall
 * @nargs: number of args, which should not be more than AUDITSC_ARGS.
 * @args: args array
 *
 */
int __audit_socketcall(int nargs, unsigned long *args)
{ … }

/**
 * __audit_fd_pair - record audit data for pipe and socketpair
 * @fd1: the first file descriptor
 * @fd2: the second file descriptor
 *
 */
void __audit_fd_pair(int fd1, int fd2)
{ … }

/**
 * __audit_sockaddr - record audit data for sys_bind, sys_connect, sys_sendto
 * @len: data length in user space
 * @a: data address in kernel space
 *
 * Returns 0 for success or NULL context or < 0 on error.
 */
int __audit_sockaddr(int len, void *a)
{ … }

void __audit_ptrace(struct task_struct *t)
{ … }

/**
 * audit_signal_info_syscall - record signal info for syscalls
 * @t: task being signaled
 *
 * If the audit subsystem is being terminated, record the task (pid)
 * and uid that is doing that.
 */
int audit_signal_info_syscall(struct task_struct *t)
{ … }

/**
 * __audit_log_bprm_fcaps - store information about a loading bprm and relevant fcaps
 * @bprm: pointer to the bprm being processed
 * @new: the proposed new credentials
 * @old: the old credentials
 *
 * Simply check if the proc already has the caps given by the file and if not
 * store the priv escalation info for later auditing at the end of the syscall
 *
 * -Eric
 */
int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
			   const struct cred *new, const struct cred *old)
{ … }

/**
 * __audit_log_capset - store information about the arguments to the capset syscall
 * @new: the new credentials
 * @old: the old (current) credentials
 *
 * Record the arguments userspace sent to sys_capset for later printing by the
 * audit system if applicable
 */
void __audit_log_capset(const struct cred *new, const struct cred *old)
{ … }

void __audit_mmap_fd(int fd, int flags)
{ … }

void __audit_openat2_how(struct open_how *how)
{ … }

void __audit_log_kern_module(char *name)
{ … }

void __audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
{ … }

void __audit_tk_injoffset(struct timespec64 offset)
{ … }

void __audit_ntp_log(const struct audit_ntp_data *ad)
{ … }

void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries,
		       enum audit_nfcfgop op, gfp_t gfp)
{ … }
EXPORT_SYMBOL_GPL(…);

static void audit_log_task(struct audit_buffer *ab)
{ … }

/**
 * audit_core_dumps - record information about processes that end abnormally
 * @signr: signal value
 *
 * If a process ends with a core dump, something fishy is going on and we
 * should record the event for investigation.
 */
void audit_core_dumps(long signr)
{ … }

/**
 * audit_seccomp - record information about a seccomp action
 * @syscall: syscall number
 * @signr: signal value
 * @code: the seccomp action
 *
 * Record the information associated with a seccomp action. Event filtering for
 * seccomp actions that are not to be logged is done in seccomp_log().
 * Therefore, this function forces auditing independent of the audit_enabled
 * and dummy context state because seccomp actions should be logged even when
 * audit is not in use.
 */
void audit_seccomp(unsigned long syscall, long signr, int code)
{ … }

void audit_seccomp_actions_logged(const char *names, const char *old_names,
				  int res)
{ … }

struct list_head *audit_killed_trees(void)
{ … }
linux/kernel/auditsc.c