linux/ipc/mqueue.c

/*
 * POSIX message queues filesystem for Linux.
 *
 * Copyright (C) 2003,2004  Krzysztof Benedyczak    ([email protected])
 *                          Michal Wronski          ([email protected])
 *
 * Spinlocks:               Mohamed Abbas           ([email protected])
 * Lockless receive & send, fd based notify:
 *			    Manfred Spraul	    ([email protected])
 *
 * Audit:                   George Wilson           ([email protected])
 *
 * This file is released under the GPL.
 */

#include <linux/capability.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/fs_context.h>
#include <linux/namei.h>
#include <linux/sysctl.h>
#include <linux/poll.h>
#include <linux/mqueue.h>
#include <linux/msg.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <linux/netlink.h>
#include <linux/syscalls.h>
#include <linux/audit.h>
#include <linux/signal.h>
#include <linux/mutex.h>
#include <linux/nsproxy.h>
#include <linux/pid.h>
#include <linux/ipc_namespace.h>
#include <linux/user_namespace.h>
#include <linux/slab.h>
#include <linux/sched/wake_q.h>
#include <linux/sched/signal.h>
#include <linux/sched/user.h>

#include <net/sock.h>
#include "util.h"

struct mqueue_fs_context {};

#define MQUEUE_MAGIC
#define DIRENT_SIZE
#define FILENT_SIZE

#define SEND
#define RECV

#define STATE_NONE
#define STATE_READY

struct posix_msg_tree_node {};

/*
 * Locking:
 *
 * Accesses to a message queue are synchronized by acquiring info->lock.
 *
 * There are two notable exceptions:
 * - The actual wakeup of a sleeping task is performed using the wake_q
 *   framework. info->lock is already released when wake_up_q is called.
 * - The exit codepaths after sleeping check ext_wait_queue->state without
 *   any locks. If it is STATE_READY, then the syscall is completed without
 *   acquiring info->lock.
 *
 * MQ_BARRIER:
 * To achieve proper release/acquire memory barrier pairing, the state is set to
 * STATE_READY with smp_store_release(), and it is read with READ_ONCE followed
 * by smp_acquire__after_ctrl_dep(). In addition, wake_q_add_safe() is used.
 *
 * This prevents the following races:
 *
 * 1) With the simple wake_q_add(), the task could be gone already before
 *    the increase of the reference happens
 * Thread A
 *				Thread B
 * WRITE_ONCE(wait.state, STATE_NONE);
 * schedule_hrtimeout()
 *				wake_q_add(A)
 *				if (cmpxchg()) // success
 *				   ->state = STATE_READY (reordered)
 * <timeout returns>
 * if (wait.state == STATE_READY) return;
 * sysret to user space
 * sys_exit()
 *				get_task_struct() // UaF
 *
 * Solution: Use wake_q_add_safe() and perform the get_task_struct() before
 * the smp_store_release() that does ->state = STATE_READY.
 *
 * 2) Without proper _release/_acquire barriers, the woken up task
 *    could read stale data
 *
 * Thread A
 *				Thread B
 * do_mq_timedreceive
 * WRITE_ONCE(wait.state, STATE_NONE);
 * schedule_hrtimeout()
 *				state = STATE_READY;
 * <timeout returns>
 * if (wait.state == STATE_READY) return;
 * msg_ptr = wait.msg;		// Access to stale data!
 *				receiver->msg = message; (reordered)
 *
 * Solution: use _release and _acquire barriers.
 *
 * 3) There is intentionally no barrier when setting current->state
 *    to TASK_INTERRUPTIBLE: spin_unlock(&info->lock) provides the
 *    release memory barrier, and the wakeup is triggered when holding
 *    info->lock, i.e. spin_lock(&info->lock) provided a pairing
 *    acquire memory barrier.
 */

struct ext_wait_queue {};

struct mqueue_inode_info {};

static struct file_system_type mqueue_fs_type;
static const struct inode_operations mqueue_dir_inode_operations;
static const struct file_operations mqueue_file_operations;
static const struct super_operations mqueue_super_ops;
static const struct fs_context_operations mqueue_fs_context_ops;
static void remove_notification(struct mqueue_inode_info *info);

static struct kmem_cache *mqueue_inode_cachep;

static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
{}

/*
 * This routine should be called with the mq_lock held.
 */
static inline struct ipc_namespace *__get_ns_from_inode(struct inode *inode)
{}

static struct ipc_namespace *get_ns_from_inode(struct inode *inode)
{}

/* Auxiliary functions to manipulate messages' list */
static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
{}

static inline void msg_tree_erase(struct posix_msg_tree_node *leaf,
				  struct mqueue_inode_info *info)
{}

static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
{}

static struct inode *mqueue_get_inode(struct super_block *sb,
		struct ipc_namespace *ipc_ns, umode_t mode,
		struct mq_attr *attr)
{}

static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc)
{}

static int mqueue_get_tree(struct fs_context *fc)
{}

static void mqueue_fs_context_free(struct fs_context *fc)
{}

static int mqueue_init_fs_context(struct fs_context *fc)
{}

/*
 * mq_init_ns() is currently the only caller of mq_create_mount().
 * So the ns parameter is always a newly created ipc namespace.
 */
static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
{}

static void init_once(void *foo)
{}

static struct inode *mqueue_alloc_inode(struct super_block *sb)
{}

static void mqueue_free_inode(struct inode *inode)
{}

static void mqueue_evict_inode(struct inode *inode)
{}

static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg)
{}

static int mqueue_create(struct mnt_idmap *idmap, struct inode *dir,
			 struct dentry *dentry, umode_t mode, bool excl)
{}

static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
{}

/*
*	This is routine for system read from queue file.
*	To avoid mess with doing here some sort of mq_receive we allow
*	to read only queue size & notification info (the only values
*	that are interesting from user point of view and aren't accessible
*	through std routines)
*/
static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
				size_t count, loff_t *off)
{}

static int mqueue_flush_file(struct file *filp, fl_owner_t id)
{}

static __poll_t mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab)
{}

/* Adds current to info->e_wait_q[sr] before element with smaller prio */
static void wq_add(struct mqueue_inode_info *info, int sr,
			struct ext_wait_queue *ewp)
{}

/*
 * Puts current task to sleep. Caller must hold queue lock. After return
 * lock isn't held.
 * sr: SEND or RECV
 */
static int wq_sleep(struct mqueue_inode_info *info, int sr,
		    ktime_t *timeout, struct ext_wait_queue *ewp)
	__releases(&info->lock)
{}

/*
 * Returns waiting task that should be serviced first or NULL if none exists
 */
static struct ext_wait_queue *wq_get_first_waiter(
		struct mqueue_inode_info *info, int sr)
{}


static inline void set_cookie(struct sk_buff *skb, char code)
{}

/*
 * The next function is only to split too long sys_mq_timedsend
 */
static void __do_notify(struct mqueue_inode_info *info)
{}

static int prepare_timeout(const struct __kernel_timespec __user *u_abs_timeout,
			   struct timespec64 *ts)
{}

static void remove_notification(struct mqueue_inode_info *info)
{}

static int prepare_open(struct dentry *dentry, int oflag, int ro,
			umode_t mode, struct filename *name,
			struct mq_attr *attr)
{}

static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
		      struct mq_attr *attr)
{}

SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
		struct mq_attr __user *, u_attr)
{}

SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
{}

/* Pipelined send and receive functions.
 *
 * If a receiver finds no waiting message, then it registers itself in the
 * list of waiting receivers. A sender checks that list before adding the new
 * message into the message array. If there is a waiting receiver, then it
 * bypasses the message array and directly hands the message over to the
 * receiver. The receiver accepts the message and returns without grabbing the
 * queue spinlock:
 *
 * - Set pointer to message.
 * - Queue the receiver task for later wakeup (without the info->lock).
 * - Update its state to STATE_READY. Now the receiver can continue.
 * - Wake up the process after the lock is dropped. Should the process wake up
 *   before this wakeup (due to a timeout or a signal) it will either see
 *   STATE_READY and continue or acquire the lock to check the state again.
 *
 * The same algorithm is used for senders.
 */

static inline void __pipelined_op(struct wake_q_head *wake_q,
				  struct mqueue_inode_info *info,
				  struct ext_wait_queue *this)
{}

/* pipelined_send() - send a message directly to the task waiting in
 * sys_mq_timedreceive() (without inserting message into a queue).
 */
static inline void pipelined_send(struct wake_q_head *wake_q,
				  struct mqueue_inode_info *info,
				  struct msg_msg *message,
				  struct ext_wait_queue *receiver)
{}

/* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
 * gets its message and put to the queue (we have one free place for sure). */
static inline void pipelined_receive(struct wake_q_head *wake_q,
				     struct mqueue_inode_info *info)
{}

static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
		size_t msg_len, unsigned int msg_prio,
		struct timespec64 *ts)
{}

static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
		size_t msg_len, unsigned int __user *u_msg_prio,
		struct timespec64 *ts)
{}

SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
		size_t, msg_len, unsigned int, msg_prio,
		const struct __kernel_timespec __user *, u_abs_timeout)
{}

SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
		size_t, msg_len, unsigned int __user *, u_msg_prio,
		const struct __kernel_timespec __user *, u_abs_timeout)
{}

/*
 * Notes: the case when user wants us to deregister (with NULL as pointer)
 * and he isn't currently owner of notification, will be silently discarded.
 * It isn't explicitly defined in the POSIX.
 */
static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification)
{}

SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
		const struct sigevent __user *, u_notification)
{}

static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old)
{}

SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
		const struct mq_attr __user *, u_mqstat,
		struct mq_attr __user *, u_omqstat)
{}

#ifdef CONFIG_COMPAT

struct compat_mq_attr {};

static inline int get_compat_mq_attr(struct mq_attr *attr,
			const struct compat_mq_attr __user *uattr)
{}

static inline int put_compat_mq_attr(const struct mq_attr *attr,
			struct compat_mq_attr __user *uattr)
{}

COMPAT_SYSCALL_DEFINE4(mq_open, const char __user *, u_name,
		       int, oflag, compat_mode_t, mode,
		       struct compat_mq_attr __user *, u_attr)
{}

COMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
		       const struct compat_sigevent __user *, u_notification)
{}

COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
		       const struct compat_mq_attr __user *, u_mqstat,
		       struct compat_mq_attr __user *, u_omqstat)
{}
#endif

#ifdef CONFIG_COMPAT_32BIT_TIME
static int compat_prepare_timeout(const struct old_timespec32 __user *p,
				   struct timespec64 *ts)
{}

SYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes,
		const char __user *, u_msg_ptr,
		unsigned int, msg_len, unsigned int, msg_prio,
		const struct old_timespec32 __user *, u_abs_timeout)
{}

SYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes,
		char __user *, u_msg_ptr,
		unsigned int, msg_len, unsigned int __user *, u_msg_prio,
		const struct old_timespec32 __user *, u_abs_timeout)
{}
#endif

static const struct inode_operations mqueue_dir_inode_operations =;

static const struct file_operations mqueue_file_operations =;

static const struct super_operations mqueue_super_ops =;

static const struct fs_context_operations mqueue_fs_context_ops =;

static struct file_system_type mqueue_fs_type =;

int mq_init_ns(struct ipc_namespace *ns)
{}

void mq_clear_sbinfo(struct ipc_namespace *ns)
{}

static int __init init_mqueue_fs(void)
{}

device_initcall(init_mqueue_fs);