// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics * Elevator latency, (C) 2000 Andrea Arcangeli <[email protected]> SuSE * Queue request tables / lock, selectable elevator, Jens Axboe <[email protected]> * kernel-doc documentation started by NeilBrown <[email protected]> * - July2000 * bio rewrite, highmem i/o, etc, Jens Axboe <[email protected]> - may 2001 */ /* * This handles all read/write requests to block devices */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/blk-pm.h> #include <linux/blk-integrity.h> #include <linux/highmem.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/kernel_stat.h> #include <linux/string.h> #include <linux/init.h> #include <linux/completion.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/writeback.h> #include <linux/task_io_accounting_ops.h> #include <linux/fault-inject.h> #include <linux/list_sort.h> #include <linux/delay.h> #include <linux/ratelimit.h> #include <linux/pm_runtime.h> #include <linux/t10-pi.h> #include <linux/debugfs.h> #include <linux/bpf.h> #include <linux/part_stat.h> #include <linux/sched/sysctl.h> #include <linux/blk-crypto.h> #define CREATE_TRACE_POINTS #include <trace/events/block.h> #include "blk.h" #include "blk-mq-sched.h" #include "blk-pm.h" #include "blk-cgroup.h" #include "blk-throttle.h" #include "blk-ioprio.h" struct dentry *blk_debugfs_root; EXPORT_TRACEPOINT_SYMBOL_GPL(…); EXPORT_TRACEPOINT_SYMBOL_GPL(…); EXPORT_TRACEPOINT_SYMBOL_GPL(…); EXPORT_TRACEPOINT_SYMBOL_GPL(…); EXPORT_TRACEPOINT_SYMBOL_GPL(…); EXPORT_TRACEPOINT_SYMBOL_GPL(…); static DEFINE_IDA(blk_queue_ida); /* * For queue allocation */ static struct kmem_cache *blk_requestq_cachep; /* * Controlling structure to kblockd */ static struct workqueue_struct *kblockd_workqueue; /** * blk_queue_flag_set - atomically set a queue flag * @flag: flag to be set * @q: request queue */ void blk_queue_flag_set(unsigned int flag, struct request_queue *q) { … } EXPORT_SYMBOL(…); /** * blk_queue_flag_clear - atomically clear a queue flag * @flag: flag to be cleared * @q: request queue */ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q) { … } EXPORT_SYMBOL(…); #define REQ_OP_NAME … static const char *const blk_op_name[] = …; #undef REQ_OP_NAME /** * blk_op_str - Return string XXX in the REQ_OP_XXX. * @op: REQ_OP_XXX. * * Description: Centralize block layer function to convert REQ_OP_XXX into * string format. Useful in the debugging and tracing bio or request. For * invalid REQ_OP_XXX it returns string "UNKNOWN". */ inline const char *blk_op_str(enum req_op op) { … } EXPORT_SYMBOL_GPL(…); static const struct { … } blk_errors[] = …; blk_status_t errno_to_blk_status(int errno) { … } EXPORT_SYMBOL_GPL(…); int blk_status_to_errno(blk_status_t status) { … } EXPORT_SYMBOL_GPL(…); const char *blk_status_to_str(blk_status_t status) { … } EXPORT_SYMBOL_GPL(…); /** * blk_sync_queue - cancel any pending callbacks on a queue * @q: the queue * * Description: * The block layer may perform asynchronous callback activity * on a queue, such as calling the unplug function after a timeout. * A block device may call blk_sync_queue to ensure that any * such activity is cancelled, thus allowing it to release resources * that the callbacks might use. The caller must already have made sure * that its ->submit_bio will not re-add plugging prior to calling * this function. * * This function does not cancel any asynchronous activity arising * out of elevator or throttling code. That would require elevator_exit() * and blkcg_exit_queue() to be called with queue lock initialized. * */ void blk_sync_queue(struct request_queue *q) { … } EXPORT_SYMBOL(…); /** * blk_set_pm_only - increment pm_only counter * @q: request queue pointer */ void blk_set_pm_only(struct request_queue *q) { … } EXPORT_SYMBOL_GPL(…); void blk_clear_pm_only(struct request_queue *q) { … } EXPORT_SYMBOL_GPL(…); static void blk_free_queue_rcu(struct rcu_head *rcu_head) { … } static void blk_free_queue(struct request_queue *q) { … } /** * blk_put_queue - decrement the request_queue refcount * @q: the request_queue structure to decrement the refcount for * * Decrements the refcount of the request_queue and free it when the refcount * reaches 0. */ void blk_put_queue(struct request_queue *q) { … } EXPORT_SYMBOL(…); void blk_queue_start_drain(struct request_queue *q) { … } /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { … } int __bio_queue_enter(struct request_queue *q, struct bio *bio) { … } void blk_queue_exit(struct request_queue *q) { … } static void blk_queue_usage_counter_release(struct percpu_ref *ref) { … } static void blk_rq_timed_out_timer(struct timer_list *t) { … } static void blk_timeout_work(struct work_struct *work) { … } struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) { … } /** * blk_get_queue - increment the request_queue refcount * @q: the request_queue structure to increment the refcount for * * Increment the refcount of the request_queue kobject. * * Context: Any context. */ bool blk_get_queue(struct request_queue *q) { … } EXPORT_SYMBOL(…); #ifdef CONFIG_FAIL_MAKE_REQUEST static DECLARE_FAULT_ATTR(fail_make_request); static int __init setup_fail_make_request(char *str) { … } __setup(…); bool should_fail_request(struct block_device *part, unsigned int bytes) { … } static int __init fail_make_request_debugfs(void) { … } late_initcall(fail_make_request_debugfs); #endif /* CONFIG_FAIL_MAKE_REQUEST */ static inline void bio_check_ro(struct bio *bio) { … } static noinline int should_fail_bio(struct bio *bio) { … } ALLOW_ERROR_INJECTION(…); /* * Check whether this bio extends beyond the end of the device or partition. * This may well happen - the kernel calls bread() without checking the size of * the device, e.g., when mounting a file system. */ static inline int bio_check_eod(struct bio *bio) { … } /* * Remap block n of partition p to block n+start(p) of the disk. */ static int blk_partition_remap(struct bio *bio) { … } /* * Check write append to a zoned block device. */ static inline blk_status_t blk_check_zone_append(struct request_queue *q, struct bio *bio) { … } static void __submit_bio(struct bio *bio) { … } /* * The loop in this function may be a bit non-obvious, and so deserves some * explanation: * * - Before entering the loop, bio->bi_next is NULL (as all callers ensure * that), so we have a list with a single bio. * - We pretend that we have just taken it off a longer list, so we assign * bio_list to a pointer to the bio_list_on_stack, thus initialising the * bio_list of new bios to be added. ->submit_bio() may indeed add some more * bios through a recursive call to submit_bio_noacct. If it did, we find a * non-NULL value in bio_list and re-enter the loop from the top. * - In this case we really did just take the bio of the top of the list (no * pretending) and so remove it from bio_list, and call into ->submit_bio() * again. * * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio. * bio_list_on_stack[1] contains bios that were submitted before the current * ->submit_bio, but that haven't been processed yet. */ static void __submit_bio_noacct(struct bio *bio) { … } static void __submit_bio_noacct_mq(struct bio *bio) { … } void submit_bio_noacct_nocheck(struct bio *bio) { … } static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q, struct bio *bio) { … } /** * submit_bio_noacct - re-submit a bio to the block device layer for I/O * @bio: The bio describing the location in memory and on the device. * * This is a version of submit_bio() that shall only be used for I/O that is * resubmitted to lower level drivers by stacking block drivers. All file * systems and other upper level users of the block layer should use * submit_bio() instead. */ void submit_bio_noacct(struct bio *bio) { … } EXPORT_SYMBOL(…); static void bio_set_ioprio(struct bio *bio) { … } /** * submit_bio - submit a bio to the block device layer for I/O * @bio: The &struct bio which describes the I/O * * submit_bio() is used to submit I/O requests to block devices. It is passed a * fully set up &struct bio that describes the I/O that needs to be done. The * bio will be send to the device described by the bi_bdev field. * * The success/failure status of the request, along with notification of * completion, is delivered asynchronously through the ->bi_end_io() callback * in @bio. The bio must NOT be touched by the caller until ->bi_end_io() has * been called. */ void submit_bio(struct bio *bio) { … } EXPORT_SYMBOL(…); /** * bio_poll - poll for BIO completions * @bio: bio to poll for * @iob: batches of IO * @flags: BLK_POLL_* flags that control the behavior * * Poll for completions on queue associated with the bio. Returns number of * completed entries found. * * Note: the caller must either be the context that submitted @bio, or * be in a RCU critical section to prevent freeing of @bio. */ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) { … } EXPORT_SYMBOL_GPL(…); /* * Helper to implement file_operations.iopoll. Requires the bio to be stored * in iocb->private, and cleared before freeing the bio. */ int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, unsigned int flags) { … } EXPORT_SYMBOL_GPL(…); void update_io_ticks(struct block_device *part, unsigned long now, bool end) { … } unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time) { … } EXPORT_SYMBOL(…); /** * bio_start_io_acct - start I/O accounting for bio based drivers * @bio: bio to start account for * * Returns the start time that should be passed back to bio_end_io_acct(). */ unsigned long bio_start_io_acct(struct bio *bio) { … } EXPORT_SYMBOL_GPL(…); void bdev_end_io_acct(struct block_device *bdev, enum req_op op, unsigned int sectors, unsigned long start_time) { … } EXPORT_SYMBOL(…); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev) { … } EXPORT_SYMBOL_GPL(…); /** * blk_lld_busy - Check if underlying low-level drivers of a device are busy * @q : the queue of the device being checked * * Description: * Check if underlying low-level drivers of a device are busy. * If the drivers want to export their busy state, they must set own * exporting function using blk_queue_lld_busy() first. * * Basically, this function is used only by request stacking drivers * to stop dispatching requests to underlying devices when underlying * devices are busy. This behavior helps more I/O merging on the queue * of the request stacking driver and prevents I/O throughput regression * on burst I/O load. * * Return: * 0 - Not busy (The request stacking driver should dispatch request) * 1 - Busy (The request stacking driver should stop dispatching request) */ int blk_lld_busy(struct request_queue *q) { … } EXPORT_SYMBOL_GPL(…); int kblockd_schedule_work(struct work_struct *work) { … } EXPORT_SYMBOL(…); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { … } EXPORT_SYMBOL(…); void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) { … } /** * blk_start_plug - initialize blk_plug and track it inside the task_struct * @plug: The &struct blk_plug that needs to be initialized * * Description: * blk_start_plug() indicates to the block layer an intent by the caller * to submit multiple I/O requests in a batch. The block layer may use * this hint to defer submitting I/Os from the caller until blk_finish_plug() * is called. However, the block layer may choose to submit requests * before a call to blk_finish_plug() if the number of queued I/Os * exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than * %BLK_PLUG_FLUSH_SIZE. The queued I/Os may also be submitted early if * the task schedules (see below). * * Tracking blk_plug inside the task_struct will help with auto-flushing the * pending I/O should the task end up blocking between blk_start_plug() and * blk_finish_plug(). This is important from a performance perspective, but * also ensures that we don't deadlock. For instance, if the task is blocking * for a memory allocation, memory reclaim could end up wanting to free a * page belonging to that request that is currently residing in our private * plug. By flushing the pending I/O when the process goes to sleep, we avoid * this kind of deadlock. */ void blk_start_plug(struct blk_plug *plug) { … } EXPORT_SYMBOL(…); static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) { … } struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, int size) { … } EXPORT_SYMBOL(…); void __blk_flush_plug(struct blk_plug *plug, bool from_schedule) { … } /** * blk_finish_plug - mark the end of a batch of submitted I/O * @plug: The &struct blk_plug passed to blk_start_plug() * * Description: * Indicate that a batch of I/O submissions is complete. This function * must be paired with an initial call to blk_start_plug(). The intent * is to allow the block layer to optimize I/O submission. See the * documentation for blk_start_plug() for more information. */ void blk_finish_plug(struct blk_plug *plug) { … } EXPORT_SYMBOL(…); void blk_io_schedule(void) { … } EXPORT_SYMBOL_GPL(…); int __init blk_dev_init(void) { … }