linux/kernel/sys.c

// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/kernel/sys.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/export.h>
#include <linux/mm.h>
#include <linux/mm_inline.h>
#include <linux/utsname.h>
#include <linux/mman.h>
#include <linux/reboot.h>
#include <linux/prctl.h>
#include <linux/highuid.h>
#include <linux/fs.h>
#include <linux/kmod.h>
#include <linux/ksm.h>
#include <linux/perf_event.h>
#include <linux/resource.h>
#include <linux/kernel.h>
#include <linux/workqueue.h>
#include <linux/capability.h>
#include <linux/device.h>
#include <linux/key.h>
#include <linux/times.h>
#include <linux/posix-timers.h>
#include <linux/security.h>
#include <linux/random.h>
#include <linux/suspend.h>
#include <linux/tty.h>
#include <linux/signal.h>
#include <linux/cn_proc.h>
#include <linux/getcpu.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/seccomp.h>
#include <linux/cpu.h>
#include <linux/personality.h>
#include <linux/ptrace.h>
#include <linux/fs_struct.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/gfp.h>
#include <linux/syscore_ops.h>
#include <linux/version.h>
#include <linux/ctype.h>
#include <linux/syscall_user_dispatch.h>

#include <linux/compat.h>
#include <linux/syscalls.h>
#include <linux/kprobes.h>
#include <linux/user_namespace.h>
#include <linux/time_namespace.h>
#include <linux/binfmts.h>

#include <linux/sched.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/stat.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/task.h>
#include <linux/sched/cputime.h>
#include <linux/rcupdate.h>
#include <linux/uidgid.h>
#include <linux/cred.h>

#include <linux/nospec.h>

#include <linux/kmsg_dump.h>
/* Move somewhere else to avoid recompiling? */
#include <generated/utsrelease.h>

#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/unistd.h>

#include "uid16.h"

#ifndef SET_UNALIGN_CTL
#define SET_UNALIGN_CTL(a, b)
#endif
#ifndef GET_UNALIGN_CTL
#define GET_UNALIGN_CTL(a, b)
#endif
#ifndef SET_FPEMU_CTL
#define SET_FPEMU_CTL(a, b)
#endif
#ifndef GET_FPEMU_CTL
#define GET_FPEMU_CTL(a, b)
#endif
#ifndef SET_FPEXC_CTL
#define SET_FPEXC_CTL(a, b)
#endif
#ifndef GET_FPEXC_CTL
#define GET_FPEXC_CTL(a, b)
#endif
#ifndef GET_ENDIAN
#define GET_ENDIAN(a, b)
#endif
#ifndef SET_ENDIAN
#define SET_ENDIAN(a, b)
#endif
#ifndef GET_TSC_CTL
#define GET_TSC_CTL
#endif
#ifndef SET_TSC_CTL
#define SET_TSC_CTL
#endif
#ifndef GET_FP_MODE
#define GET_FP_MODE(a)
#endif
#ifndef SET_FP_MODE
#define SET_FP_MODE(a,b)
#endif
#ifndef SVE_SET_VL
#define SVE_SET_VL(a)
#endif
#ifndef SVE_GET_VL
#define SVE_GET_VL()
#endif
#ifndef SME_SET_VL
#define SME_SET_VL(a)
#endif
#ifndef SME_GET_VL
#define SME_GET_VL()
#endif
#ifndef PAC_RESET_KEYS
#define PAC_RESET_KEYS(a, b)
#endif
#ifndef PAC_SET_ENABLED_KEYS
#define PAC_SET_ENABLED_KEYS(a, b, c)
#endif
#ifndef PAC_GET_ENABLED_KEYS
#define PAC_GET_ENABLED_KEYS(a)
#endif
#ifndef SET_TAGGED_ADDR_CTRL
#define SET_TAGGED_ADDR_CTRL(a)
#endif
#ifndef GET_TAGGED_ADDR_CTRL
#define GET_TAGGED_ADDR_CTRL()
#endif
#ifndef RISCV_V_SET_CONTROL
#define RISCV_V_SET_CONTROL(a)
#endif
#ifndef RISCV_V_GET_CONTROL
#define RISCV_V_GET_CONTROL()
#endif
#ifndef RISCV_SET_ICACHE_FLUSH_CTX
#define RISCV_SET_ICACHE_FLUSH_CTX(a, b)
#endif
#ifndef PPC_GET_DEXCR_ASPECT
#define PPC_GET_DEXCR_ASPECT(a, b)
#endif
#ifndef PPC_SET_DEXCR_ASPECT
#define PPC_SET_DEXCR_ASPECT(a, b, c)
#endif

/*
 * this is where the system-wide overflow UID and GID are defined, for
 * architectures that now have 32-bit UID/GID but didn't in the past
 */

int overflowuid =;
int overflowgid =;

EXPORT_SYMBOL();
EXPORT_SYMBOL();

/*
 * the same as above, but for filesystems which can only store a 16-bit
 * UID and GID. as such, this is needed on all architectures
 */

int fs_overflowuid =;
int fs_overflowgid =;

EXPORT_SYMBOL();
EXPORT_SYMBOL();

/*
 * Returns true if current's euid is same as p's uid or euid,
 * or has CAP_SYS_NICE to p's user_ns.
 *
 * Called with rcu_read_lock, creds are safe
 */
static bool set_one_prio_perm(struct task_struct *p)
{}

/*
 * set the priority of a task
 * - the caller must hold the RCU read lock
 */
static int set_one_prio(struct task_struct *p, int niceval, int error)
{}

SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
{}

/*
 * Ugh. To avoid negative return values, "getpriority()" will
 * not return the normal nice-value, but a negated value that
 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
 * to stay compatible.
 */
SYSCALL_DEFINE2(getpriority, int, which, int, who)
{}

/*
 * Unprivileged users may change the real gid to the effective gid
 * or vice versa.  (BSD-style)
 *
 * If you set the real gid at all, or set the effective gid to a value not
 * equal to the real gid, then the saved gid is set to the new effective gid.
 *
 * This makes it possible for a setgid program to completely drop its
 * privileges, which is often a useful assertion to make when you are doing
 * a security audit over a program.
 *
 * The general idea is that a program which uses just setregid() will be
 * 100% compatible with BSD.  A program which uses just setgid() will be
 * 100% compatible with POSIX with saved IDs.
 *
 * SMP: There are not races, the GIDs are checked only by filesystem
 *      operations (as far as semantic preservation is concerned).
 */
#ifdef CONFIG_MULTIUSER
long __sys_setregid(gid_t rgid, gid_t egid)
{}

SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
{}

/*
 * setgid() is implemented like SysV w/ SAVED_IDS
 *
 * SMP: Same implicit races as above.
 */
long __sys_setgid(gid_t gid)
{}

SYSCALL_DEFINE1(setgid, gid_t, gid)
{}

/*
 * change the user struct in a credentials set to match the new UID
 */
static int set_user(struct cred *new)
{}

static void flag_nproc_exceeded(struct cred *new)
{}

/*
 * Unprivileged users may change the real uid to the effective uid
 * or vice versa.  (BSD-style)
 *
 * If you set the real uid at all, or set the effective uid to a value not
 * equal to the real uid, then the saved uid is set to the new effective uid.
 *
 * This makes it possible for a setuid program to completely drop its
 * privileges, which is often a useful assertion to make when you are doing
 * a security audit over a program.
 *
 * The general idea is that a program which uses just setreuid() will be
 * 100% compatible with BSD.  A program which uses just setuid() will be
 * 100% compatible with POSIX with saved IDs.
 */
long __sys_setreuid(uid_t ruid, uid_t euid)
{}

SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
{}

/*
 * setuid() is implemented like SysV with SAVED_IDS
 *
 * Note that SAVED_ID's is deficient in that a setuid root program
 * like sendmail, for example, cannot set its uid to be a normal
 * user and then switch back, because if you're root, setuid() sets
 * the saved uid too.  If you don't like this, blame the bright people
 * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
 * will allow a root program to temporarily drop privileges and be able to
 * regain them by swapping the real and effective uid.
 */
long __sys_setuid(uid_t uid)
{}

SYSCALL_DEFINE1(setuid, uid_t, uid)
{}


/*
 * This function implements a generic ability to update ruid, euid,
 * and suid.  This allows you to implement the 4.4 compatible seteuid().
 */
long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
{}

SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
{}

SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp)
{}

/*
 * Same as above, but for rgid, egid, sgid.
 */
long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
{}

SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
{}

SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp)
{}


/*
 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
 * is used for "access()" and for the NFS daemon (letting nfsd stay at
 * whatever uid it wants to). It normally shadows "euid", except when
 * explicitly set by setfsuid() or for access..
 */
long __sys_setfsuid(uid_t uid)
{}

SYSCALL_DEFINE1(setfsuid, uid_t, uid)
{}

/*
 * Samma på svenska..
 */
long __sys_setfsgid(gid_t gid)
{}

SYSCALL_DEFINE1(setfsgid, gid_t, gid)
{}
#endif /* CONFIG_MULTIUSER */

/**
 * sys_getpid - return the thread group id of the current process
 *
 * Note, despite the name, this returns the tgid not the pid.  The tgid and
 * the pid are identical unless CLONE_THREAD was specified on clone() in
 * which case the tgid is the same in all threads of the same group.
 *
 * This is SMP safe as current->tgid does not change.
 */
SYSCALL_DEFINE0()
{

/* Thread ID - the internal kernel "pid" */
SYSCALL_DEFINE0()
{

/*
 * Accessing ->real_parent is not SMP-safe, it could
 * change from under us. However, we can use a stale
 * value of ->real_parent under rcu_read_lock(), see
 * release_task()->call_rcu(delayed_put_task_struct).
 */
SYSCALL_DEFINE0(getppid)
{}

SYSCALL_DEFINE0()
{

SYSCALL_DEFINE0()
{

SYSCALL_DEFINE0()
{

SYSCALL_DEFINE0()
{

static void do_sys_times(struct tms *tms)
{}

SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
{}

#ifdef CONFIG_COMPAT
static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
{}

COMPAT_SYSCALL_DEFINE1(times, struct compat_tms __user *, tbuf)
{}
#endif

/*
 * This needs some heavy checking ...
 * I just haven't the stomach for it. I also don't fully
 * understand sessions/pgrp etc. Let somebody who does explain it.
 *
 * OK, I think I have the protection semantics right.... this is really
 * only important on a multi-user system anyway, to make sure one user
 * can't send a signal to a process owned by another.  -TYT, 12/12/91
 *
 * !PF_FORKNOEXEC check to conform completely to POSIX.
 */
SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
{}

static int do_getpgid(pid_t pid)
{}

SYSCALL_DEFINE1(getpgid, pid_t, pid)
{}

#ifdef __ARCH_WANT_SYS_GETPGRP

SYSCALL_DEFINE0()
{

#endif

SYSCALL_DEFINE1(getsid, pid_t, pid)
{}

static void set_special_pids(struct pid *pid)
{}

int ksys_setsid(void)
{}

SYSCALL_DEFINE0()
{

DECLARE_RWSEM();

#ifdef COMPAT_UTS_MACHINE
#define override_architecture(name)
#else
#define override_architecture
#endif

/*
 * Work around broken programs that cannot handle "Linux 3.0".
 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
 * And we map 4.x and later versions to 2.6.60+x, so 4.0/5.0/6.0/... would be
 * 2.6.60.
 */
static int override_release(char __user *release, size_t len)
{}

SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
{}

#ifdef __ARCH_WANT_SYS_OLD_UNAME
/*
 * Old cruft
 */
SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
{}

SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
{}
#endif

SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
{}

#ifdef __ARCH_WANT_SYS_GETHOSTNAME

SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
{}

#endif

/*
 * Only setdomainname; getdomainname can be implemented by calling
 * uname()
 */
SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
{}

/* make sure you are allowed to change @tsk limits before calling this */
static int do_prlimit(struct task_struct *tsk, unsigned int resource,
		      struct rlimit *new_rlim, struct rlimit *old_rlim)
{}

SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{}

#ifdef CONFIG_COMPAT

COMPAT_SYSCALL_DEFINE2(setrlimit, unsigned int, resource,
		       struct compat_rlimit __user *, rlim)
{}

COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource,
		       struct compat_rlimit __user *, rlim)
{}

#endif

#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT

/*
 *	Back compatibility for getrlimit. Needed for some apps.
 */
SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
		struct rlimit __user *, rlim)
{}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
		       struct compat_rlimit __user *, rlim)
{}
#endif

#endif

static inline bool rlim64_is_infinity(__u64 rlim64)
{}

static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
{}

static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
{}

/* rcu lock must be held */
static int check_prlimit_permission(struct task_struct *task,
				    unsigned int flags)
{}

SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
		const struct rlimit64 __user *, new_rlim,
		struct rlimit64 __user *, old_rlim)
{}

SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{}

/*
 * It would make sense to put struct rusage in the task_struct,
 * except that would make the task_struct be *really big*.  After
 * task_struct gets moved into malloc'ed memory, it would
 * make sense to do this.  It will make moving the rest of the information
 * a lot simpler!  (Which we're not doing right now because we're not
 * measuring them yet).
 *
 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
 * races with threads incrementing their own counters.  But since word
 * reads are atomic, we either get new values or old values and we don't
 * care which for the sums.  We always take the siglock to protect reading
 * the c* fields from p->signal from races with exit.c updating those
 * fields when reaping, so a sample either gets all the additions of a
 * given child after it's reaped, or none so this sample is before reaping.
 *
 * Locking:
 * We need to take the siglock for CHILDEREN, SELF and BOTH
 * for  the cases current multithreaded, non-current single threaded
 * non-current multithreaded.  Thread traversal is now safe with
 * the siglock held.
 * Strictly speaking, we donot need to take the siglock if we are current and
 * single threaded,  as no one else can take our signal_struct away, no one
 * else can  reap the  children to update signal->c* counters, and no one else
 * can race with the signal-> fields. If we do not take any lock, the
 * signal-> fields could be read out of order while another thread was just
 * exiting. So we should  place a read memory barrier when we avoid the lock.
 * On the writer side,  write memory barrier is implied in  __exit_signal
 * as __exit_signal releases  the siglock spinlock after updating the signal->
 * fields. But we don't do this yet to keep things simple.
 *
 */

static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
{}

void getrusage(struct task_struct *p, int who, struct rusage *r)
{}

SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
{}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru)
{}
#endif

SYSCALL_DEFINE1(umask, int, mask)
{}

static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
{}

/*
 * Check arithmetic relations of passed addresses.
 *
 * WARNING: we don't require any capability here so be very careful
 * in what is allowed for modification from userspace.
 */
static int validate_prctl_map_addr(struct prctl_mm_map *prctl_map)
{}

#ifdef CONFIG_CHECKPOINT_RESTORE
static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
{}
#endif /* CONFIG_CHECKPOINT_RESTORE */

static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
			  unsigned long len)
{}

static int prctl_set_mm(int opt, unsigned long addr,
			unsigned long arg4, unsigned long arg5)
{}

#ifdef CONFIG_CHECKPOINT_RESTORE
static int prctl_get_tid_address(struct task_struct *me, int __user * __user *tid_addr)
{}
#else
static int prctl_get_tid_address(struct task_struct *me, int __user * __user *tid_addr)
{
	return -EINVAL;
}
#endif

static int propagate_has_child_subreaper(struct task_struct *p, void *data)
{}

int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
{}

int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
				    unsigned long ctrl)
{}

#define PR_IO_FLUSHER

#ifdef CONFIG_ANON_VMA_NAME

#define ANON_VMA_NAME_MAX_LEN
#define ANON_VMA_NAME_INVALID_CHARS

static inline bool is_valid_name_char(char ch)
{}

static int prctl_set_vma(unsigned long opt, unsigned long addr,
			 unsigned long size, unsigned long arg)
{}

#else /* CONFIG_ANON_VMA_NAME */
static int prctl_set_vma(unsigned long opt, unsigned long start,
			 unsigned long size, unsigned long arg)
{
	return -EINVAL;
}
#endif /* CONFIG_ANON_VMA_NAME */

static inline unsigned long get_current_mdwe(void)
{}

static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
				 unsigned long arg4, unsigned long arg5)
{}

static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
				 unsigned long arg4, unsigned long arg5)
{}

static int prctl_get_auxv(void __user *addr, unsigned long len)
{}

SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
		unsigned long, arg4, unsigned long, arg5)
{}

SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
		struct getcpu_cache __user *, unused)
{}

/**
 * do_sysinfo - fill in sysinfo struct
 * @info: pointer to buffer to fill
 */
static int do_sysinfo(struct sysinfo *info)
{}

SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
{}

#ifdef CONFIG_COMPAT
struct compat_sysinfo {};

COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
{}
#endif /* CONFIG_COMPAT */