linux/drivers/block/ublk_drv.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Userspace block device - block device which IO is handled from userspace
 *
 * Take full use of io_uring passthrough command for communicating with
 * ublk userspace daemon(ublksrvd) for handling basic IO request.
 *
 * Copyright 2022 Ming Lei <[email protected]>
 *
 * (part of code stolen from loop.c)
 */
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/major.h>
#include <linux/wait.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/compat.h>
#include <linux/mutex.h>
#include <linux/writeback.h>
#include <linux/completion.h>
#include <linux/highmem.h>
#include <linux/sysfs.h>
#include <linux/miscdevice.h>
#include <linux/falloc.h>
#include <linux/uio.h>
#include <linux/ioprio.h>
#include <linux/sched/mm.h>
#include <linux/uaccess.h>
#include <linux/cdev.h>
#include <linux/io_uring/cmd.h>
#include <linux/blk-mq.h>
#include <linux/delay.h>
#include <linux/mm.h>
#include <asm/page.h>
#include <linux/task_work.h>
#include <linux/namei.h>
#include <linux/kref.h>
#include <uapi/linux/ublk_cmd.h>

#define UBLK_MINORS

/* private ioctl command mirror */
#define UBLK_CMD_DEL_DEV_ASYNC

/* All UBLK_F_* have to be included into UBLK_F_ALL */
#define UBLK_F_ALL

/* All UBLK_PARAM_TYPE_* should be included here */
#define UBLK_PARAM_TYPE_ALL

struct ublk_rq_data {};

struct ublk_uring_cmd_pdu {};

/*
 * io command is active: sqe cmd is received, and its cqe isn't done
 *
 * If the flag is set, the io command is owned by ublk driver, and waited
 * for incoming blk-mq request from the ublk block device.
 *
 * If the flag is cleared, the io command will be completed, and owned by
 * ublk server.
 */
#define UBLK_IO_FLAG_ACTIVE

/*
 * IO command is completed via cqe, and it is being handled by ublksrv, and
 * not committed yet
 *
 * Basically exclusively with UBLK_IO_FLAG_ACTIVE, so can be served for
 * cross verification
 */
#define UBLK_IO_FLAG_OWNED_BY_SRV

/*
 * IO command is aborted, so this flag is set in case of
 * !UBLK_IO_FLAG_ACTIVE.
 *
 * After this flag is observed, any pending or new incoming request
 * associated with this io command will be failed immediately
 */
#define UBLK_IO_FLAG_ABORTED

/*
 * UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires
 * get data buffer address from ublksrv.
 *
 * Then, bio data could be copied into this data buffer for a WRITE request
 * after the IO command is issued again and UBLK_IO_FLAG_NEED_GET_DATA is unset.
 */
#define UBLK_IO_FLAG_NEED_GET_DATA

/* atomic RW with ubq->cancel_lock */
#define UBLK_IO_FLAG_CANCELED

struct ublk_io {};

struct ublk_queue {};

struct ublk_device {};

/* header of ublk_params */
struct ublk_params_header {};

static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq);

static inline unsigned int ublk_req_build_flags(struct request *req);
static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
						   int tag);
static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub)
{}

static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
{}

static inline bool ublk_queue_is_zoned(struct ublk_queue *ubq)
{}

#ifdef CONFIG_BLK_DEV_ZONED

static int ublk_get_nr_zones(const struct ublk_device *ub)
{}

static int ublk_revalidate_disk_zones(struct ublk_device *ub)
{}

static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
{}

static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
{}

/* Based on virtblk_alloc_report_buffer */
static void *ublk_alloc_report_buffer(struct ublk_device *ublk,
				      unsigned int nr_zones, size_t *buflen)
{}

static int ublk_report_zones(struct gendisk *disk, sector_t sector,
		      unsigned int nr_zones, report_zones_cb cb, void *data)
{}

static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq,
					 struct request *req)
{}

#else

#define ublk_report_zones

static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
{
	return -EOPNOTSUPP;
}

static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
{
}

static int ublk_revalidate_disk_zones(struct ublk_device *ub)
{
	return 0;
}

static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq,
					 struct request *req)
{
	return BLK_STS_NOTSUPP;
}

#endif

static inline void __ublk_complete_rq(struct request *req);
static void ublk_complete_rq(struct kref *ref);

static dev_t ublk_chr_devt;
static const struct class ublk_chr_class =;

static DEFINE_IDR(ublk_index_idr);
static DEFINE_SPINLOCK(ublk_idr_lock);
static wait_queue_head_t ublk_idr_wq;	/* wait until one idr is freed */

static DEFINE_MUTEX(ublk_ctl_mutex);

/*
 * Max ublk devices allowed to add
 *
 * It can be extended to one per-user limit in future or even controlled
 * by cgroup.
 */
#define UBLK_MAX_UBLKS
static unsigned int ublks_max =;
static unsigned int ublks_added;	/* protected by ublk_ctl_mutex */

static struct miscdevice ublk_misc;

static inline unsigned ublk_pos_to_hwq(loff_t pos)
{}

static inline unsigned ublk_pos_to_buf_off(loff_t pos)
{}

static inline unsigned ublk_pos_to_tag(loff_t pos)
{}

static void ublk_dev_param_basic_apply(struct ublk_device *ub)
{}

static int ublk_validate_params(const struct ublk_device *ub)
{}

static void ublk_apply_params(struct ublk_device *ub)
{}

static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
{}

static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
{}

static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
		struct request *req)
{}

static inline bool ublk_get_req_ref(const struct ublk_queue *ubq,
		struct request *req)
{}

static inline void ublk_put_req_ref(const struct ublk_queue *ubq,
		struct request *req)
{}

static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
{}

/* Called in slow path only, keep it noinline for trace purpose */
static noinline struct ublk_device *ublk_get_device(struct ublk_device *ub)
{}

/* Called in slow path only, keep it noinline for trace purpose */
static noinline void ublk_put_device(struct ublk_device *ub)
{}

static inline struct ublk_queue *ublk_get_queue(struct ublk_device *dev,
		int qid)
{}

static inline bool ublk_rq_has_data(const struct request *rq)
{}

static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
		int tag)
{}

static inline char *ublk_queue_cmd_buf(struct ublk_device *ub, int q_id)
{}

static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id)
{}

static inline bool ublk_queue_can_use_recovery_reissue(
		struct ublk_queue *ubq)
{}

static inline bool ublk_queue_can_use_recovery(
		struct ublk_queue *ubq)
{}

static inline bool ublk_can_use_recovery(struct ublk_device *ub)
{}

static void ublk_free_disk(struct gendisk *disk)
{}

static void ublk_store_owner_uid_gid(unsigned int *owner_uid,
		unsigned int *owner_gid)
{}

static int ublk_open(struct gendisk *disk, blk_mode_t mode)
{}

static const struct block_device_operations ub_fops =;

#define UBLK_MAX_PIN_PAGES

struct ublk_io_iter {};

/* return how many pages are copied */
static void ublk_copy_io_pages(struct ublk_io_iter *data,
		size_t total, size_t pg_off, int dir)
{}

static bool ublk_advance_io_iter(const struct request *req,
		struct ublk_io_iter *iter, unsigned int offset)
{}

/*
 * Copy data between request pages and io_iter, and 'offset'
 * is the start point of linear offset of request.
 */
static size_t ublk_copy_user_pages(const struct request *req,
		unsigned offset, struct iov_iter *uiter, int dir)
{}

static inline bool ublk_need_map_req(const struct request *req)
{}

static inline bool ublk_need_unmap_req(const struct request *req)
{}

static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
		struct ublk_io *io)
{}

static int ublk_unmap_io(const struct ublk_queue *ubq,
		const struct request *req,
		struct ublk_io *io)
{}

static inline unsigned int ublk_req_build_flags(struct request *req)
{}

static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
{}

static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu(
		struct io_uring_cmd *ioucmd)
{}

static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq)
{}

/* todo: handle partial completion */
static inline void __ublk_complete_rq(struct request *req)
{}

static void ublk_complete_rq(struct kref *ref)
{}

/*
 * Since __ublk_rq_task_work always fails requests immediately during
 * exiting, __ublk_fail_req() is only called from abort context during
 * exiting. So lock is unnecessary.
 *
 * Also aborting may not be started yet, keep in mind that one failed
 * request may be issued by block layer again.
 */
static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
		struct request *req)
{}

static void ubq_complete_io_cmd(struct ublk_io *io, int res,
				unsigned issue_flags)
{}

#define UBLK_REQUEUE_DELAY_MS

static inline void __ublk_abort_rq(struct ublk_queue *ubq,
		struct request *rq)
{}

static inline void __ublk_rq_task_work(struct request *req,
				       unsigned issue_flags)
{}

static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
					unsigned issue_flags)
{}

static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
{}

static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
{}

static enum blk_eh_timer_return ublk_timeout(struct request *rq)
{}

static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
		const struct blk_mq_queue_data *bd)
{}

static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
		unsigned int hctx_idx)
{}

static const struct blk_mq_ops ublk_mq_ops =;

static int ublk_ch_open(struct inode *inode, struct file *filp)
{}

static int ublk_ch_release(struct inode *inode, struct file *filp)
{}

/* map pre-allocated per-queue cmd buffer to ublksrv daemon */
static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
{}

static void ublk_commit_completion(struct ublk_device *ub,
		const struct ublksrv_io_cmd *ub_cmd)
{}

/*
 * Called from ubq_daemon context via cancel fn, meantime quiesce ublk
 * blk-mq queue, so we are called exclusively with blk-mq and ubq_daemon
 * context, so everything is serialized.
 */
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
{}

static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)
{}

static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
		unsigned int issue_flags)
{}

/*
 * The ublk char device won't be closed when calling cancel fn, so both
 * ublk device and queue are guaranteed to be live
 */
static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
		unsigned int issue_flags)
{}

static inline bool ublk_queue_ready(struct ublk_queue *ubq)
{}

static void ublk_cancel_queue(struct ublk_queue *ubq)
{}

/* Cancel all pending commands, must be called after del_gendisk() returns */
static void ublk_cancel_dev(struct ublk_device *ub)
{}

static bool ublk_check_inflight_rq(struct request *rq, void *data)
{}

static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub)
{}

static void __ublk_quiesce_dev(struct ublk_device *ub)
{}

static void ublk_quiesce_work_fn(struct work_struct *work)
{}

static void ublk_unquiesce_dev(struct ublk_device *ub)
{}

static void ublk_stop_dev(struct ublk_device *ub)
{}

/* device can only be started after all IOs are ready */
static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
{}

static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
		int tag)
{}

static inline int ublk_check_cmd_op(u32 cmd_op)
{}

static inline void ublk_fill_io_cmd(struct ublk_io *io,
		struct io_uring_cmd *cmd, unsigned long buf_addr)
{}

static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
				    unsigned int issue_flags,
				    struct ublk_queue *ubq, unsigned int tag)
{}

static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
			       unsigned int issue_flags,
			       const struct ublksrv_io_cmd *ub_cmd)
{}

static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
		struct ublk_queue *ubq, int tag, size_t offset)
{}

static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
		unsigned int issue_flags)
{}

static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
		unsigned int issue_flags)
{}

static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
{}

static inline bool ublk_check_ubuf_dir(const struct request *req,
		int ubuf_dir)
{}

static struct request *ublk_check_and_get_req(struct kiocb *iocb,
		struct iov_iter *iter, size_t *off, int dir)
{}

static ssize_t ublk_ch_read_iter(struct kiocb *iocb, struct iov_iter *to)
{}

static ssize_t ublk_ch_write_iter(struct kiocb *iocb, struct iov_iter *from)
{}

static const struct file_operations ublk_ch_fops =;

static void ublk_deinit_queue(struct ublk_device *ub, int q_id)
{}

static int ublk_init_queue(struct ublk_device *ub, int q_id)
{}

static void ublk_deinit_queues(struct ublk_device *ub)
{}

static int ublk_init_queues(struct ublk_device *ub)
{}

static int ublk_alloc_dev_number(struct ublk_device *ub, int idx)
{}

static void ublk_free_dev_number(struct ublk_device *ub)
{}

static void ublk_cdev_rel(struct device *dev)
{}

static int ublk_add_chdev(struct ublk_device *ub)
{}

static void ublk_stop_work_fn(struct work_struct *work)
{}

/* align max io buffer size with PAGE_SIZE */
static void ublk_align_max_io_size(struct ublk_device *ub)
{}

static int ublk_add_tag_set(struct ublk_device *ub)
{}

static void ublk_remove(struct ublk_device *ub)
{}

static struct ublk_device *ublk_get_device_from_id(int idx)
{}

static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
{}

static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub,
		struct io_uring_cmd *cmd)
{}

static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
{}

static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
{}

static inline bool ublk_idr_freed(int id)
{}

static int ublk_ctrl_del_dev(struct ublk_device **p_ub, bool wait)
{}

static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
{}

static int ublk_ctrl_stop_dev(struct ublk_device *ub)
{}

static int ublk_ctrl_get_dev_info(struct ublk_device *ub,
		struct io_uring_cmd *cmd)
{}

/* TYPE_DEVT is readonly, so fill it up before returning to userspace */
static void ublk_ctrl_fill_params_devt(struct ublk_device *ub)
{}

static int ublk_ctrl_get_params(struct ublk_device *ub,
		struct io_uring_cmd *cmd)
{}

static int ublk_ctrl_set_params(struct ublk_device *ub,
		struct io_uring_cmd *cmd)
{}

static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
{}

static int ublk_ctrl_start_recovery(struct ublk_device *ub,
		struct io_uring_cmd *cmd)
{}

static int ublk_ctrl_end_recovery(struct ublk_device *ub,
		struct io_uring_cmd *cmd)
{}

static int ublk_ctrl_get_features(struct io_uring_cmd *cmd)
{}

/*
 * All control commands are sent via /dev/ublk-control, so we have to check
 * the destination device's permission
 */
static int ublk_char_dev_permission(struct ublk_device *ub,
		const char *dev_path, int mask)
{}

static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
		struct io_uring_cmd *cmd)
{}

static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
		unsigned int issue_flags)
{}

static const struct file_operations ublk_ctl_fops =;

static struct miscdevice ublk_misc =;

static int __init ublk_init(void)
{}

static void __exit ublk_exit(void)
{}

module_init();
module_exit(ublk_exit);

static int ublk_set_max_ublks(const char *buf, const struct kernel_param *kp)
{}

static int ublk_get_max_ublks(char *buf, const struct kernel_param *kp)
{}

static const struct kernel_param_ops ublk_max_ublks_ops =;

module_param_cb();
MODULE_PARM_DESC();

MODULE_AUTHOR();
MODULE_DESCRIPTION();
MODULE_LICENSE();