// SPDX-License-Identifier: GPL-2.0-or-later /* Common capabilities, needed by capability.o. */ #include <linux/capability.h> #include <linux/audit.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/lsm_hooks.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/ptrace.h> #include <linux/xattr.h> #include <linux/hugetlb.h> #include <linux/mount.h> #include <linux/sched.h> #include <linux/prctl.h> #include <linux/securebits.h> #include <linux/user_namespace.h> #include <linux/binfmts.h> #include <linux/personality.h> #include <linux/mnt_idmapping.h> #include <uapi/linux/lsm.h> /* * If a non-root user executes a setuid-root binary in * !secure(SECURE_NOROOT) mode, then we raise capabilities. * However if fE is also set, then the intent is for only * the file capabilities to be applied, and the setuid-root * bit is left on either to change the uid (plausible) or * to get full privilege on a kernel without file capabilities * support. So in that case we do not raise capabilities. * * Warn if that happens, once per boot. */ static void warn_setuid_and_fcaps_mixed(const char *fname) { … } /** * cap_capable - Determine whether a task has a particular effective capability * @cred: The credentials to use * @targ_ns: The user namespace in which we need the capability * @cap: The capability to check for * @opts: Bitmask of options defined in include/linux/security.h * * Determine whether the nominated task has the specified capability amongst * its effective set, returning 0 if it does, -ve if it does not. * * NOTE WELL: cap_has_capability() cannot be used like the kernel's capable() * and has_capability() functions. That is, it has the reverse semantics: * cap_has_capability() returns 0 when a task has a capability, but the * kernel's capable() and has_capability() returns 1 for this case. */ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, int cap, unsigned int opts) { … } /** * cap_settime - Determine whether the current process may set the system clock * @ts: The time to set * @tz: The timezone to set * * Determine whether the current process may set the system clock and timezone * information, returning 0 if permission granted, -ve if denied. */ int cap_settime(const struct timespec64 *ts, const struct timezone *tz) { … } /** * cap_ptrace_access_check - Determine whether the current process may access * another * @child: The process to be accessed * @mode: The mode of attachment. * * If we are in the same or an ancestor user_ns and have all the target * task's capabilities, then ptrace access is allowed. * If we have the ptrace capability to the target user_ns, then ptrace * access is allowed. * Else denied. * * Determine whether a process may access another, returning 0 if permission * granted, -ve if denied. */ int cap_ptrace_access_check(struct task_struct *child, unsigned int mode) { … } /** * cap_ptrace_traceme - Determine whether another process may trace the current * @parent: The task proposed to be the tracer * * If parent is in the same or an ancestor user_ns and has all current's * capabilities, then ptrace access is allowed. * If parent has the ptrace capability to current's user_ns, then ptrace * access is allowed. * Else denied. * * Determine whether the nominated task is permitted to trace the current * process, returning 0 if permission is granted, -ve if denied. */ int cap_ptrace_traceme(struct task_struct *parent) { … } /** * cap_capget - Retrieve a task's capability sets * @target: The task from which to retrieve the capability sets * @effective: The place to record the effective set * @inheritable: The place to record the inheritable set * @permitted: The place to record the permitted set * * This function retrieves the capabilities of the nominated task and returns * them to the caller. */ int cap_capget(const struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { … } /* * Determine whether the inheritable capabilities are limited to the old * permitted set. Returns 1 if they are limited, 0 if they are not. */ static inline int cap_inh_is_capped(void) { … } /** * cap_capset - Validate and apply proposed changes to current's capabilities * @new: The proposed new credentials; alterations should be made here * @old: The current task's current credentials * @effective: A pointer to the proposed new effective capabilities set * @inheritable: A pointer to the proposed new inheritable capabilities set * @permitted: A pointer to the proposed new permitted capabilities set * * This function validates and applies a proposed mass change to the current * process's capability sets. The changes are made to the proposed new * credentials, and assuming no error, will be committed by the caller of LSM. */ int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { … } /** * cap_inode_need_killpriv - Determine if inode change affects privileges * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV * * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV * affects the security markings on that inode, and if it is, should * inode_killpriv() be invoked or the change rejected. * * Return: 1 if security.capability has a value, meaning inode_killpriv() * is required, 0 otherwise, meaning inode_killpriv() is not required. */ int cap_inode_need_killpriv(struct dentry *dentry) { … } /** * cap_inode_killpriv - Erase the security markings on an inode * * @idmap: idmap of the mount the inode was found from * @dentry: The inode/dentry to alter * * Erase the privilege-enhancing security markings on an inode. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. * * Return: 0 if successful, -ve on error. */ int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry) { … } static bool rootid_owns_currentns(vfsuid_t rootvfsuid) { … } static __u32 sansflags(__u32 m) { … } static bool is_v2header(int size, const struct vfs_cap_data *cap) { … } static bool is_v3header(int size, const struct vfs_cap_data *cap) { … } /* * getsecurity: We are called for security.* before any attempt to read the * xattr from the inode itself. * * This gives us a chance to read the on-disk value and convert it. If we * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler. * * Note we are not called by vfs_getxattr_alloc(), but that is only called * by the integrity subsystem, which really wants the unconverted values - * so that's good. */ int cap_inode_getsecurity(struct mnt_idmap *idmap, struct inode *inode, const char *name, void **buffer, bool alloc) { … } /** * rootid_from_xattr - translate root uid of vfs caps * * @value: vfs caps value which may be modified by this function * @size: size of @ivalue * @task_ns: user namespace of the caller */ static vfsuid_t rootid_from_xattr(const void *value, size_t size, struct user_namespace *task_ns) { … } static bool validheader(size_t size, const struct vfs_cap_data *cap) { … } /** * cap_convert_nscap - check vfs caps * * @idmap: idmap of the mount the inode was found from * @dentry: used to retrieve inode to check permissions on * @ivalue: vfs caps value which may be modified by this function * @size: size of @ivalue * * User requested a write of security.capability. If needed, update the * xattr to change from v2 to v3, or to fixup the v3 rootid. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. * * Return: On success, return the new size; on error, return < 0. */ int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry, const void **ivalue, size_t size) { … } /* * Calculate the new process capability sets from the capability sets attached * to a file. */ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, struct linux_binprm *bprm, bool *effective, bool *has_fcap) { … } /** * get_vfs_caps_from_disk - retrieve vfs caps from disk * * @idmap: idmap of the mount the inode was found from * @dentry: dentry from which @inode is retrieved * @cpu_caps: vfs capabilities * * Extract the on-exec-apply capability sets for an executable file. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. */ int get_vfs_caps_from_disk(struct mnt_idmap *idmap, const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps) { … } /* * Attempt to get the on-exec apply capability sets for an executable file from * its xattrs and, if present, apply them to the proposed credentials being * constructed by execve(). */ static int get_file_caps(struct linux_binprm *bprm, const struct file *file, bool *effective, bool *has_fcap) { … } static inline bool root_privileged(void) { … } static inline bool __is_real(kuid_t uid, struct cred *cred) { … } static inline bool __is_eff(kuid_t uid, struct cred *cred) { … } static inline bool __is_suid(kuid_t uid, struct cred *cred) { … } /* * handle_privileged_root - Handle case of privileged root * @bprm: The execution parameters, including the proposed creds * @has_fcap: Are any file capabilities set? * @effective: Do we have effective root privilege? * @root_uid: This namespace' root UID WRT initial USER namespace * * Handle the case where root is privileged and hasn't been neutered by * SECURE_NOROOT. If file capabilities are set, they won't be combined with * set UID root and nothing is changed. If we are root, cap_permitted is * updated. If we have become set UID root, the effective bit is set. */ static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap, bool *effective, kuid_t root_uid) { … } #define __cap_gained(field, target, source) … #define __cap_grew(target, source, cred) … #define __cap_full(field, cred) … static inline bool __is_setuid(struct cred *new, const struct cred *old) { … } static inline bool __is_setgid(struct cred *new, const struct cred *old) { … } /* * 1) Audit candidate if current->cap_effective is set * * We do not bother to audit if 3 things are true: * 1) cap_effective has all caps * 2) we became root *OR* are were already root * 3) root is supposed to have all caps (SECURE_NOROOT) * Since this is just a normal root execing a process. * * Number 1 above might fail if you don't have a full bset, but I think * that is interesting information to audit. * * A number of other conditions require logging: * 2) something prevented setuid root getting all caps * 3) non-setuid root gets fcaps * 4) non-setuid root gets ambient */ static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old, kuid_t root, bool has_fcap) { … } /** * cap_bprm_creds_from_file - Set up the proposed credentials for execve(). * @bprm: The execution parameters, including the proposed creds * @file: The file to pull the credentials from * * Set up the proposed credentials for a new execution context being * constructed by execve(). The proposed creds in @bprm->cred is altered, * which won't take effect immediately. * * Return: 0 if successful, -ve on error. */ int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file) { … } /** * cap_inode_setxattr - Determine whether an xattr may be altered * @dentry: The inode/dentry being altered * @name: The name of the xattr to be changed * @value: The value that the xattr will be changed to * @size: The size of value * @flags: The replacement flag * * Determine whether an xattr may be altered or set on an inode, returning 0 if * permission is granted, -ve if denied. * * This is used to make sure security xattrs don't get updated or set by those * who aren't privileged to do so. */ int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { … } /** * cap_inode_removexattr - Determine whether an xattr may be removed * * @idmap: idmap of the mount the inode was found from * @dentry: The inode/dentry being altered * @name: The name of the xattr to be changed * * Determine whether an xattr may be removed from an inode, returning 0 if * permission is granted, -ve if denied. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. * * This is used to make sure security xattrs don't get removed by those who * aren't privileged to remove them. */ int cap_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name) { … } /* * cap_emulate_setxuid() fixes the effective / permitted capabilities of * a process after a call to setuid, setreuid, or setresuid. * * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of * {r,e,s}uid != 0, the permitted and effective capabilities are * cleared. * * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective * capabilities of the process are cleared. * * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective * capabilities are set to the permitted capabilities. * * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should * never happen. * * -astor * * cevans - New behaviour, Oct '99 * A process may, via prctl(), elect to keep its capabilities when it * calls setuid() and switches away from uid==0. Both permitted and * effective sets will be retained. * Without this change, it was impossible for a daemon to drop only some * of its privilege. The call to setuid(!=0) would drop all privileges! * Keeping uid 0 is not an option because uid 0 owns too many vital * files.. * Thanks to Olaf Kirch and Peter Benie for spotting this. */ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) { … } /** * cap_task_fix_setuid - Fix up the results of setuid() call * @new: The proposed credentials * @old: The current task's current credentials * @flags: Indications of what has changed * * Fix up the results of setuid() call before the credential changes are * actually applied. * * Return: 0 to grant the changes, -ve to deny them. */ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) { … } /* * Rationale: code calling task_setscheduler, task_setioprio, and * task_setnice, assumes that * . if capable(cap_sys_nice), then those actions should be allowed * . if not capable(cap_sys_nice), but acting on your own processes, * then those actions should be allowed * This is insufficient now since you can call code without suid, but * yet with increased caps. * So we check for increased caps on the target process. */ static int cap_safe_nice(struct task_struct *p) { … } /** * cap_task_setscheduler - Determine if scheduler policy change is permitted * @p: The task to affect * * Determine if the requested scheduler policy change is permitted for the * specified task. * * Return: 0 if permission is granted, -ve if denied. */ int cap_task_setscheduler(struct task_struct *p) { … } /** * cap_task_setioprio - Determine if I/O priority change is permitted * @p: The task to affect * @ioprio: The I/O priority to set * * Determine if the requested I/O priority change is permitted for the specified * task. * * Return: 0 if permission is granted, -ve if denied. */ int cap_task_setioprio(struct task_struct *p, int ioprio) { … } /** * cap_task_setnice - Determine if task priority change is permitted * @p: The task to affect * @nice: The nice value to set * * Determine if the requested task priority change is permitted for the * specified task. * * Return: 0 if permission is granted, -ve if denied. */ int cap_task_setnice(struct task_struct *p, int nice) { … } /* * Implement PR_CAPBSET_DROP. Attempt to remove the specified capability from * the current task's bounding set. Returns 0 on success, -ve on error. */ static int cap_prctl_drop(unsigned long cap) { … } /** * cap_task_prctl - Implement process control functions for this security module * @option: The process control function requested * @arg2: The argument data for this function * @arg3: The argument data for this function * @arg4: The argument data for this function * @arg5: The argument data for this function * * Allow process control functions (sys_prctl()) to alter capabilities; may * also deny access to other functions not otherwise implemented here. * * Return: 0 or +ve on success, -ENOSYS if this function is not implemented * here, other -ve on error. If -ENOSYS is returned, sys_prctl() and other LSM * modules will consider performing the function. */ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { … } /** * cap_vm_enough_memory - Determine whether a new virtual mapping is permitted * @mm: The VM space in which the new mapping is to be made * @pages: The size of the mapping * * Determine whether the allocation of a new virtual mapping by the current * task is permitted. * * Return: 1 if permission is granted, 0 if not. */ int cap_vm_enough_memory(struct mm_struct *mm, long pages) { … } /** * cap_mmap_addr - check if able to map given addr * @addr: address attempting to be mapped * * If the process is attempting to map memory below dac_mmap_min_addr they need * CAP_SYS_RAWIO. The other parameters to this function are unused by the * capability security module. * * Return: 0 if this mapping should be allowed or -EPERM if not. */ int cap_mmap_addr(unsigned long addr) { … } int cap_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) { … } #ifdef CONFIG_SECURITY static const struct lsm_id capability_lsmid = …; static struct security_hook_list capability_hooks[] __ro_after_init = …; static int __init capability_init(void) { … } DEFINE_LSM(capability) = …; #endif /* CONFIG_SECURITY */