// SPDX-License-Identifier: GPL-2.0-only /* * Generic pidhash and scalable, time-bounded PID allocator * * (C) 2002-2003 Nadia Yvette Chambers, IBM * (C) 2004 Nadia Yvette Chambers, Oracle * (C) 2002-2004 Ingo Molnar, Red Hat * * pid-structures are backing objects for tasks sharing a given ID to chain * against. There is very little to them aside from hashing them and * parking tasks using given ID's on a list. * * The hash is always changed with the tasklist_lock write-acquired, * and the hash is only accessed with the tasklist_lock at least * read-acquired, so there's no additional SMP locking needed here. * * We have a list of bitmap pages, which bitmaps represent the PID space. * Allocating and freeing PIDs is completely lockless. The worst-case * allocation scenario when all but one out of 1 million PIDs possible are * allocated already: the scanning of 32 list entries and at most PAGE_SIZE * bytes. The typical fastpath is a single successful setbit. Freeing is O(1). * * Pid namespaces: * (C) 2007 Pavel Emelyanov <[email protected]>, OpenVZ, SWsoft Inc. * (C) 2007 Sukadev Bhattiprolu <[email protected]>, IBM * Many thanks to Oleg Nesterov for comments and help * */ #include <linux/mm.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/rculist.h> #include <linux/memblock.h> #include <linux/pid_namespace.h> #include <linux/init_task.h> #include <linux/syscalls.h> #include <linux/proc_ns.h> #include <linux/refcount.h> #include <linux/anon_inodes.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/idr.h> #include <linux/pidfs.h> #include <net/sock.h> #include <uapi/linux/pidfd.h> struct pid init_struct_pid = …; int pid_max = …; int pid_max_min = …; int pid_max_max = …; /* * Pseudo filesystems start inode numbering after one. We use Reserved * PIDs as a natural offset. */ static u64 pidfs_ino = …; /* * PID-map pages start out as NULL, they get allocated upon * first use and are never deallocated. This way a low pid_max * value does not cause lots of bitmaps to be allocated, but * the scheme scales to up to 4 million PIDs, runtime. */ struct pid_namespace init_pid_ns = …; EXPORT_SYMBOL_GPL(…); /* * Note: disable interrupts while the pidmap_lock is held as an * interrupt might come in and do read_lock(&tasklist_lock). * * If we don't disable interrupts there is a nasty deadlock between * detach_pid()->free_pid() and another cpu that does * spin_lock(&pidmap_lock) followed by an interrupt routine that does * read_lock(&tasklist_lock); * * After we clean up the tasklist_lock and know there are no * irq handlers that take it we can leave the interrupts enabled. * For now it is easier to be safe than to prove it can't happen. */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); void put_pid(struct pid *pid) { … } EXPORT_SYMBOL_GPL(…); static void delayed_put_pid(struct rcu_head *rhp) { … } void free_pid(struct pid *pid) { … } struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, size_t set_tid_size) { … } void disable_pid_allocation(struct pid_namespace *ns) { … } struct pid *find_pid_ns(int nr, struct pid_namespace *ns) { … } EXPORT_SYMBOL_GPL(…); struct pid *find_vpid(int nr) { … } EXPORT_SYMBOL_GPL(…); static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type) { … } /* * attach_pid() must be called with the tasklist_lock write-held. */ void attach_pid(struct task_struct *task, enum pid_type type) { … } static void __change_pid(struct task_struct *task, enum pid_type type, struct pid *new) { … } void detach_pid(struct task_struct *task, enum pid_type type) { … } void change_pid(struct task_struct *task, enum pid_type type, struct pid *pid) { … } void exchange_tids(struct task_struct *left, struct task_struct *right) { … } /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type type) { … } struct task_struct *pid_task(struct pid *pid, enum pid_type type) { … } EXPORT_SYMBOL(…); /* * Must be called under rcu_read_lock(). */ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) { … } struct task_struct *find_task_by_vpid(pid_t vnr) { … } struct task_struct *find_get_task_by_vpid(pid_t nr) { … } struct pid *get_task_pid(struct task_struct *task, enum pid_type type) { … } EXPORT_SYMBOL_GPL(…); struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) { … } EXPORT_SYMBOL_GPL(…); struct pid *find_get_pid(pid_t nr) { … } EXPORT_SYMBOL_GPL(…); pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) { … } EXPORT_SYMBOL_GPL(…); pid_t pid_vnr(struct pid *pid) { … } EXPORT_SYMBOL_GPL(…); pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns) { … } EXPORT_SYMBOL(…); struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) { … } EXPORT_SYMBOL_GPL(…); /* * Used by proc to find the first pid that is greater than or equal to nr. * * If there is a pid at nr this function is exactly the same as find_pid_ns. */ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) { … } EXPORT_SYMBOL_GPL(…); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) { … } /** * pidfd_get_task() - Get the task associated with a pidfd * * @pidfd: pidfd for which to get the task * @flags: flags associated with this pidfd * * Return the task associated with @pidfd. The function takes a reference on * the returned task. The caller is responsible for releasing that reference. * * Return: On success, the task_struct associated with the pidfd. * On error, a negative errno number will be returned. */ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags) { … } /** * pidfd_create() - Create a new pid file descriptor. * * @pid: struct pid that the pidfd will reference * @flags: flags to pass * * This creates a new pid file descriptor with the O_CLOEXEC flag set. * * Note, that this function can only be called after the fd table has * been unshared to avoid leaking the pidfd to the new process. * * This symbol should not be explicitly exported to loadable modules. * * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. */ static int pidfd_create(struct pid *pid, unsigned int flags) { … } /** * sys_pidfd_open() - Open new pid file descriptor. * * @pid: pid for which to retrieve a pidfd * @flags: flags to pass * * This creates a new pid file descriptor with the O_CLOEXEC flag set for * the task identified by @pid. Without PIDFD_THREAD flag the target task * must be a thread-group leader. * * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. */ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) { … } void __init pid_idr_init(void) { … } static struct file *__pidfd_fget(struct task_struct *task, int fd) { … } static int pidfd_getfd(struct pid *pid, int fd) { … } /** * sys_pidfd_getfd() - Get a file descriptor from another process * * @pidfd: the pidfd file descriptor of the process * @fd: the file descriptor number to get * @flags: flags on how to get the fd (reserved) * * This syscall gets a copy of a file descriptor from another process * based on the pidfd, and file descriptor number. It requires that * the calling process has the ability to ptrace the process represented * by the pidfd. The process which is having its file descriptor copied * is otherwise unaffected. * * Return: On success, a cloexec file descriptor is returned. * On error, a negative errno number will be returned. */ SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd, unsigned int, flags) { … }