// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/file.c * * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes * * Manage the dynamic fd arrays in the process files_struct. */ #include <linux/syscalls.h> #include <linux/export.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/bitops.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/close_range.h> #include <net/sock.h> #include "internal.h" unsigned int sysctl_nr_open __read_mostly = …; unsigned int sysctl_nr_open_min = …; /* our min() is unusable in constant expressions ;-/ */ #define __const_min(x, y) … unsigned int sysctl_nr_open_max = …; static void __free_fdtable(struct fdtable *fdt) { … } static void free_fdtable_rcu(struct rcu_head *rcu) { … } #define BITBIT_NR(nr) … #define BITBIT_SIZE(nr) … /* * Copy 'count' fd bits from the old table to the new table and clear the extra * space if any. This does not copy the file pointers. Called with the files * spinlock held for write. */ static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, unsigned int count) { … } /* * Copy all file descriptors from the old table to the new, expanded table and * clear the extra space. Called with the files spinlock held for write. */ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) { … } /* * Note how the fdtable bitmap allocations very much have to be a multiple of * BITS_PER_LONG. This is not only because we walk those things in chunks of * 'unsigned long' in some places, but simply because that is how the Linux * kernel bitmaps are defined to work: they are not "bits in an array of bytes", * they are very much "bits in an array of unsigned long". * * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied * by that "1024/sizeof(ptr)" before, we already know there are sufficient * clear low bits. Clang seems to realize that, gcc ends up being confused. * * On a 128-bit machine, the ALIGN() would actually matter. In the meantime, * let's consider it documentation (and maybe a test-case for gcc to improve * its code generation ;) */ static struct fdtable * alloc_fdtable(unsigned int nr) { … } /* * Expand the file descriptor table. * This function will allocate a new fdtable and both fd array and fdset, of * the given size. * Return <0 error code on error; 1 on successful completion. * The files->file_lock should be held on entry, and will be held on exit. */ static int expand_fdtable(struct files_struct *files, unsigned int nr) __releases(files->file_lock) __acquires(files->file_lock) { … } /* * Expand files. * This function will expand the file structures, if the requested size exceeds * the current capacity and there is room for expansion. * Return <0 error code on error; 0 when nothing done; 1 when files were * expanded and execution may have blocked. * The files->file_lock should be held on entry, and will be held on exit. */ static int expand_files(struct files_struct *files, unsigned int nr) __releases(files->file_lock) __acquires(files->file_lock) { … } static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt) { … } static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt) { … } static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt) { … } static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt) { … } static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt) { … } static unsigned int count_open_files(struct fdtable *fdt) { … } /* * Note that a sane fdtable size always has to be a multiple of * BITS_PER_LONG, since we have bitmaps that are sized by this. * * 'max_fds' will normally already be properly aligned, but it * turns out that in the close_range() -> __close_range() -> * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end * up having a 'max_fds' value that isn't already aligned. * * Rather than make close_range() have to worry about this, * just make that BITS_PER_LONG alignment be part of a sane * fdtable size. Becuase that's really what it is. */ static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) { … } /* * Allocate a new files structure and copy contents from the * passed in files structure. * errorp will be valid only when the returned files_struct is NULL. */ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp) { … } static struct fdtable *close_files(struct files_struct * files) { … } void put_files_struct(struct files_struct *files) { … } void exit_files(struct task_struct *tsk) { … } struct files_struct init_files = …; static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start) { … } /* * allocate a file descriptor, mark it busy. */ static int alloc_fd(unsigned start, unsigned end, unsigned flags) { … } int __get_unused_fd_flags(unsigned flags, unsigned long nofile) { … } int get_unused_fd_flags(unsigned flags) { … } EXPORT_SYMBOL(…); static void __put_unused_fd(struct files_struct *files, unsigned int fd) { … } void put_unused_fd(unsigned int fd) { … } EXPORT_SYMBOL(…); /* * Install a file pointer in the fd array. * * The VFS is full of places where we drop the files lock between * setting the open_fds bitmap and installing the file in the file * array. At any such point, we are vulnerable to a dup2() race * installing a file in the array before us. We need to detect this and * fput() the struct file we are about to overwrite in this case. * * It should never happen - if we allow dup2() do it, _really_ bad things * will follow. * * This consumes the "file" refcount, so callers should treat it * as if they had called fput(file). */ void fd_install(unsigned int fd, struct file *file) { … } EXPORT_SYMBOL(…); /** * file_close_fd_locked - return file associated with fd * @files: file struct to retrieve file from * @fd: file descriptor to retrieve file for * * Doesn't take a separate reference count. * * Context: files_lock must be held. * * Returns: The file associated with @fd (NULL if @fd is not open) */ struct file *file_close_fd_locked(struct files_struct *files, unsigned fd) { … } int close_fd(unsigned fd) { … } EXPORT_SYMBOL(…); /* for ksys_close() */ /** * last_fd - return last valid index into fd table * @fdt: File descriptor table. * * Context: Either rcu read lock or files_lock must be held. * * Returns: Last valid index into fdtable. */ static inline unsigned last_fd(struct fdtable *fdt) { … } static inline void __range_cloexec(struct files_struct *cur_fds, unsigned int fd, unsigned int max_fd) { … } static inline void __range_close(struct files_struct *files, unsigned int fd, unsigned int max_fd) { … } /** * __close_range() - Close all file descriptors in a given range. * * @fd: starting file descriptor to close * @max_fd: last file descriptor to close * @flags: CLOSE_RANGE flags. * * This closes a range of file descriptors. All file descriptors * from @fd up to and including @max_fd are closed. */ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) { … } /** * file_close_fd - return file associated with fd * @fd: file descriptor to retrieve file for * * Doesn't take a separate reference count. * * Returns: The file associated with @fd (NULL if @fd is not open) */ struct file *file_close_fd(unsigned int fd) { … } void do_close_on_exec(struct files_struct *files) { … } static struct file *__get_file_rcu(struct file __rcu **f) { … } /** * get_file_rcu - try go get a reference to a file under rcu * @f: the file to get a reference on * * This function tries to get a reference on @f carefully verifying that * @f hasn't been reused. * * This function should rarely have to be used and only by users who * understand the implications of SLAB_TYPESAFE_BY_RCU. Try to avoid it. * * Return: Returns @f with the reference count increased or NULL. */ struct file *get_file_rcu(struct file __rcu **f) { … } EXPORT_SYMBOL_GPL(…); /** * get_file_active - try go get a reference to a file * @f: the file to get a reference on * * In contast to get_file_rcu() the pointer itself isn't part of the * reference counting. * * This function should rarely have to be used and only by users who * understand the implications of SLAB_TYPESAFE_BY_RCU. Try to avoid it. * * Return: Returns @f with the reference count increased or NULL. */ struct file *get_file_active(struct file **f) { … } EXPORT_SYMBOL_GPL(…); static inline struct file *__fget_files_rcu(struct files_struct *files, unsigned int fd, fmode_t mask) { … } static struct file *__fget_files(struct files_struct *files, unsigned int fd, fmode_t mask) { … } static inline struct file *__fget(unsigned int fd, fmode_t mask) { … } struct file *fget(unsigned int fd) { … } EXPORT_SYMBOL(…); struct file *fget_raw(unsigned int fd) { … } EXPORT_SYMBOL(…); struct file *fget_task(struct task_struct *task, unsigned int fd) { … } struct file *lookup_fdget_rcu(unsigned int fd) { … } EXPORT_SYMBOL_GPL(…); struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd) { … } struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *ret_fd) { … } EXPORT_SYMBOL(…); /* * Lightweight file lookup - no refcnt increment if fd table isn't shared. * * You can use this instead of fget if you satisfy all of the following * conditions: * 1) You must call fput_light before exiting the syscall and returning control * to userspace (i.e. you cannot remember the returned struct file * after * returning to userspace). * 2) You must not call filp_close on the returned struct file * in between * calls to fget_light and fput_light. * 3) You must not clone the current task in between the calls to fget_light * and fput_light. * * The fput_needed flag returned by fget_light should be passed to the * corresponding fput_light. */ static unsigned long __fget_light(unsigned int fd, fmode_t mask) { … } unsigned long __fdget(unsigned int fd) { … } EXPORT_SYMBOL(…); unsigned long __fdget_raw(unsigned int fd) { … } /* * Try to avoid f_pos locking. We only need it if the * file is marked for FMODE_ATOMIC_POS, and it can be * accessed multiple ways. * * Always do it for directories, because pidfd_getfd() * can make a file accessible even if it otherwise would * not be, and for directories this is a correctness * issue, not a "POSIX requirement". */ static inline bool file_needs_f_pos_lock(struct file *file) { … } unsigned long __fdget_pos(unsigned int fd) { … } void __f_unlock_pos(struct file *f) { … } /* * We only lock f_pos if we have threads or if the file might be * shared with another process. In both cases we'll have an elevated * file count (done either by fdget() or by fork()). */ void set_close_on_exec(unsigned int fd, int flag) { … } bool get_close_on_exec(unsigned int fd) { … } static int do_dup2(struct files_struct *files, struct file *file, unsigned fd, unsigned flags) __releases(&files->file_lock) { … } int replace_fd(unsigned fd, struct file *file, unsigned flags) { … } /** * receive_fd() - Install received file into file descriptor table * @file: struct file that was received from another process * @ufd: __user pointer to write new fd number to * @o_flags: the O_* flags to apply to the new fd entry * * Installs a received file into the file descriptor table, with appropriate * checks and count updates. Optionally writes the fd number to userspace, if * @ufd is non-NULL. * * This helper handles its own reference counting of the incoming * struct file. * * Returns newly install fd or -ve on error. */ int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags) { … } EXPORT_SYMBOL_GPL(…); int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags) { … } static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) { … } SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) { … } SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) { … } SYSCALL_DEFINE1(dup, unsigned int, fildes) { … } int f_dupfd(unsigned int from, struct file *file, unsigned flags) { … } int iterate_fd(struct files_struct *files, unsigned n, int (*f)(const void *, struct file *, unsigned), const void *p) { … } EXPORT_SYMBOL(…);