pipe.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/pipe.c
 *
 *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
 */

#include <linux/mm.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/log2.h>
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/magic.h>
#include <linux/pipe_fs_i.h>
#include <linux/uio.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/audit.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <linux/memcontrol.h>
#include <linux/watch_queue.h>
#include <linux/sysctl.h>

#include <linux/uaccess.h>
#include <asm/ioctls.h>

#include "internal.h"

/*
 * New pipe buffers will be restricted to this size while the user is exceeding
 * their pipe buffer quota. The general pipe use case needs at least two
 * buffers: one for data yet to be read, and one for new data. If this is less
 * than two, then a write to a non-empty pipe may block even if the pipe is not
 * full. This can occur with GNU make jobserver or similar uses of pipes as
 * semaphores: multiple processes may be waiting to write tokens back to the
 * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
 *
 * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
 * own risk, namely: pipe writes to non-full pipes may block until the pipe is
 * emptied.
 */
#define PIPE_MIN_DEF_BUFFERS …

/*
 * The max size that a non-root user is allowed to grow the pipe. Can
 * be set by root in /proc/sys/fs/pipe-max-size
 */
static unsigned int pipe_max_size = …;

/* Maximum allocatable pages per user. Hard limit is unset by default, soft
 * matches default values.
 */
static unsigned long pipe_user_pages_hard;
static unsigned long pipe_user_pages_soft = …;

/*
 * We use head and tail indices that aren't masked off, except at the point of
 * dereference, but rather they're allowed to wrap naturally.  This means there
 * isn't a dead spot in the buffer, but the ring has to be a power of two and
 * <= 2^31.
 * -- David Howells 2019-09-23.
 *
 * Reads with count = 0 should always return 0.
 * -- Julian Bradfield 1999-06-07.
 *
 * FIFOs and Pipes now generate SIGIO for both readers and writers.
 * -- Jeremy Elson <[email protected]> 2001-08-16
 *
 * pipe_read & write cleanup
 * -- Manfred Spraul <[email protected]> 2002-05-09
 */

#define cmp_int(l, r) …

#ifdef CONFIG_PROVE_LOCKING
static int pipe_lock_cmp_fn(const struct lockdep_map *a,
			    const struct lockdep_map *b)
{ … }
#endif

void pipe_lock(struct pipe_inode_info *pipe)
{ … }
EXPORT_SYMBOL(…);

void pipe_unlock(struct pipe_inode_info *pipe)
{ … }
EXPORT_SYMBOL(…);

void pipe_double_lock(struct pipe_inode_info *pipe1,
		      struct pipe_inode_info *pipe2)
{ … }

static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
				  struct pipe_buffer *buf)
{ … }

static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
		struct pipe_buffer *buf)
{ … }

/**
 * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
 * @pipe:	the pipe that the buffer belongs to
 * @buf:	the buffer to attempt to steal
 *
 * Description:
 *	This function attempts to steal the &struct page attached to
 *	@buf. If successful, this function returns 0 and returns with
 *	the page locked. The caller may then reuse the page for whatever
 *	he wishes; the typical use is insertion into a different file
 *	page cache.
 */
bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
		struct pipe_buffer *buf)
{ … }
EXPORT_SYMBOL(…);

/**
 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
 * @pipe:	the pipe that the buffer belongs to
 * @buf:	the buffer to get a reference to
 *
 * Description:
 *	This function grabs an extra reference to @buf. It's used in
 *	the tee() system call, when we duplicate the buffers in one
 *	pipe into another.
 */
bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{ … }
EXPORT_SYMBOL(…);

/**
 * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
 * @pipe:	the pipe that the buffer belongs to
 * @buf:	the buffer to put a reference to
 *
 * Description:
 *	This function releases a reference to @buf.
 */
void generic_pipe_buf_release(struct pipe_inode_info *pipe,
			      struct pipe_buffer *buf)
{ … }
EXPORT_SYMBOL(…);

static const struct pipe_buf_operations anon_pipe_buf_ops = …;

/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
static inline bool pipe_readable(const struct pipe_inode_info *pipe)
{ … }

static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
					    struct pipe_buffer *buf,
					    unsigned int tail)
{ … }

static ssize_t
pipe_read(struct kiocb *iocb, struct iov_iter *to)
{ … }

static inline int is_packetized(struct file *file)
{ … }

/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
static inline bool pipe_writable(const struct pipe_inode_info *pipe)
{ … }

static ssize_t
pipe_write(struct kiocb *iocb, struct iov_iter *from)
{ … }

static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{ … }

/* No kernel lock held - fine */
static __poll_t
pipe_poll(struct file *filp, poll_table *wait)
{ … }

static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
{ … }

static int
pipe_release(struct inode *inode, struct file *file)
{ … }

static int
pipe_fasync(int fd, struct file *filp, int on)
{ … }

unsigned long account_pipe_buffers(struct user_struct *user,
				   unsigned long old, unsigned long new)
{ … }

bool too_many_pipe_buffers_soft(unsigned long user_bufs)
{ … }

bool too_many_pipe_buffers_hard(unsigned long user_bufs)
{ … }

bool pipe_is_unprivileged_user(void)
{ … }

struct pipe_inode_info *alloc_pipe_info(void)
{ … }

void free_pipe_info(struct pipe_inode_info *pipe)
{ … }

static struct vfsmount *pipe_mnt __ro_after_init;

/*
 * pipefs_dname() is called from d_path().
 */
static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
{ … }

static const struct dentry_operations pipefs_dentry_operations = …;

static struct inode * get_pipe_inode(void)
{ … }

int create_pipe_files(struct file **res, int flags)
{ … }

static int __do_pipe_flags(int *fd, struct file **files, int flags)
{ … }

int do_pipe_flags(int *fd, int flags)
{ … }

/*
 * sys_pipe() is the normal C calling standard for creating
 * a pipe. It's not the way Unix traditionally does this, though.
 */
static int do_pipe2(int __user *fildes, int flags)
{ … }

SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
{ … }

SYSCALL_DEFINE1(pipe, int __user *, fildes)
{ … }

/*
 * This is the stupid "wait for pipe to be readable or writable"
 * model.
 *
 * See pipe_read/write() for the proper kind of exclusive wait,
 * but that requires that we wake up any other readers/writers
 * if we then do not end up reading everything (ie the whole
 * "wake_next_reader/writer" logic in pipe_read/write()).
 */
void pipe_wait_readable(struct pipe_inode_info *pipe)
{ … }

void pipe_wait_writable(struct pipe_inode_info *pipe)
{ … }

/*
 * This depends on both the wait (here) and the wakeup (wake_up_partner)
 * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
 * race with the count check and waitqueue prep.
 *
 * Normally in order to avoid races, you'd do the prepare_to_wait() first,
 * then check the condition you're waiting for, and only then sleep. But
 * because of the pipe lock, we can check the condition before being on
 * the wait queue.
 *
 * We use the 'rd_wait' waitqueue for pipe partner waiting.
 */
static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
{ … }

static void wake_up_partner(struct pipe_inode_info *pipe)
{ … }

static int fifo_open(struct inode *inode, struct file *filp)
{ … }

const struct file_operations pipefifo_fops = …;

/*
 * Currently we rely on the pipe array holding a power-of-2 number
 * of pages. Returns 0 on error.
 */
unsigned int round_pipe_size(unsigned int size)
{ … }

/*
 * Resize the pipe ring to a number of slots.
 *
 * Note the pipe can be reduced in capacity, but only if the current
 * occupancy doesn't exceed nr_slots; if it does, EBUSY will be
 * returned instead.
 */
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
{ … }

/*
 * Allocate a new array of pipe buffers and copy the info over. Returns the
 * pipe size if successful, or return -ERROR on error.
 */
static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
{ … }

/*
 * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is
 * not enough to verify that this is a pipe.
 */
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
{ … }

long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
{ … }

static const struct super_operations pipefs_ops = …;

/*
 * pipefs should _never_ be mounted by userland - too much of security hassle,
 * no real gain from having the whole whorehouse mounted. So we don't need
 * any operations on the root directory. However, we need a non-trivial
 * d_name - pipe: will go nicely and kill the special-casing in procfs.
 */

static int pipefs_init_fs_context(struct fs_context *fc)
{ … }

static struct file_system_type pipe_fs_type = …;

#ifdef CONFIG_SYSCTL
static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
					unsigned int *valp,
					int write, void *data)
{ … }

static int proc_dopipe_max_size(const struct ctl_table *table, int write,
				void *buffer, size_t *lenp, loff_t *ppos)
{ … }

static struct ctl_table fs_pipe_sysctls[] = …;
#endif

static int __init init_pipe_fs(void)
{ … }

fs_initcall(init_pipe_fs);