linux/block/bio.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2001 Jens Axboe <[email protected]>
 */
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/bio-integrity.h>
#include <linux/blkdev.h>
#include <linux/uio.h>
#include <linux/iocontext.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
#include <linux/highmem.h>
#include <linux/blk-crypto.h>
#include <linux/xarray.h>

#include <trace/events/block.h>
#include "blk.h"
#include "blk-rq-qos.h"
#include "blk-cgroup.h"

#define ALLOC_CACHE_THRESHOLD
#define ALLOC_CACHE_MAX

struct bio_alloc_cache {};

static struct biovec_slab {} bvec_slabs[] __read_mostly =;

static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
{}

/*
 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
 * IO code that does not need private memory pools.
 */
struct bio_set fs_bio_set;
EXPORT_SYMBOL();

/*
 * Our slab pool management
 */
struct bio_slab {};
static DEFINE_MUTEX(bio_slab_lock);
static DEFINE_XARRAY(bio_slabs);

static struct bio_slab *create_bio_slab(unsigned int size)
{}

static inline unsigned int bs_bio_slab_size(struct bio_set *bs)
{}

static struct kmem_cache *bio_find_or_create_slab(struct bio_set *bs)
{}

static void bio_put_slab(struct bio_set *bs)
{}

void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
{}

/*
 * Make the first allocation restricted and don't dump info on allocation
 * failures, since we'll fall back to the mempool in case of failure.
 */
static inline gfp_t bvec_alloc_gfp(gfp_t gfp)
{}

struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
		gfp_t gfp_mask)
{}

void bio_uninit(struct bio *bio)
{}
EXPORT_SYMBOL();

static void bio_free(struct bio *bio)
{}

/*
 * Users of this function have their own bio allocation. Subsequently,
 * they must remember to pair any call to bio_init() with bio_uninit()
 * when IO has completed, or when the bio is released.
 */
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
	      unsigned short max_vecs, blk_opf_t opf)
{}
EXPORT_SYMBOL();

/**
 * bio_reset - reinitialize a bio
 * @bio:	bio to reset
 * @bdev:	block device to use the bio for
 * @opf:	operation and flags for bio
 *
 * Description:
 *   After calling bio_reset(), @bio will be in the same state as a freshly
 *   allocated bio returned bio bio_alloc_bioset() - the only fields that are
 *   preserved are the ones that are initialized by bio_alloc_bioset(). See
 *   comment in struct bio.
 */
void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf)
{}
EXPORT_SYMBOL();

static struct bio *__bio_chain_endio(struct bio *bio)
{}

static void bio_chain_endio(struct bio *bio)
{}

/**
 * bio_chain - chain bio completions
 * @bio: the target bio
 * @parent: the parent bio of @bio
 *
 * The caller won't have a bi_end_io called when @bio completes - instead,
 * @parent's bi_end_io won't be called until both @parent and @bio have
 * completed; the chained bio will also be freed when it completes.
 *
 * The caller must not set bi_private or bi_end_io in @bio.
 */
void bio_chain(struct bio *bio, struct bio *parent)
{}
EXPORT_SYMBOL();

/**
 * bio_chain_and_submit - submit a bio after chaining it to another one
 * @prev: bio to chain and submit
 * @new: bio to chain to
 *
 * If @prev is non-NULL, chain it to @new and submit it.
 *
 * Return: @new.
 */
struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new)
{}

struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
		unsigned int nr_pages, blk_opf_t opf, gfp_t gfp)
{}
EXPORT_SYMBOL_GPL();

static void bio_alloc_rescue(struct work_struct *work)
{}

static void punt_bios_to_rescuer(struct bio_set *bs)
{}

static void bio_alloc_irq_cache_splice(struct bio_alloc_cache *cache)
{}

static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
		unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp,
		struct bio_set *bs)
{}

/**
 * bio_alloc_bioset - allocate a bio for I/O
 * @bdev:	block device to allocate the bio for (can be %NULL)
 * @nr_vecs:	number of bvecs to pre-allocate
 * @opf:	operation and flags for bio
 * @gfp_mask:   the GFP_* mask given to the slab allocator
 * @bs:		the bio_set to allocate from.
 *
 * Allocate a bio from the mempools in @bs.
 *
 * If %__GFP_DIRECT_RECLAIM is set then bio_alloc will always be able to
 * allocate a bio.  This is due to the mempool guarantees.  To make this work,
 * callers must never allocate more than 1 bio at a time from the general pool.
 * Callers that need to allocate more than 1 bio must always submit the
 * previously allocated bio for IO before attempting to allocate a new one.
 * Failure to do so can cause deadlocks under memory pressure.
 *
 * Note that when running under submit_bio_noacct() (i.e. any block driver),
 * bios are not submitted until after you return - see the code in
 * submit_bio_noacct() that converts recursion into iteration, to prevent
 * stack overflows.
 *
 * This would normally mean allocating multiple bios under submit_bio_noacct()
 * would be susceptible to deadlocks, but we have
 * deadlock avoidance code that resubmits any blocked bios from a rescuer
 * thread.
 *
 * However, we do not guarantee forward progress for allocations from other
 * mempools. Doing multiple allocations from the same mempool under
 * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
 * for per bio allocations.
 *
 * Returns: Pointer to new bio on success, NULL on failure.
 */
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
			     blk_opf_t opf, gfp_t gfp_mask,
			     struct bio_set *bs)
{}
EXPORT_SYMBOL();

/**
 * bio_kmalloc - kmalloc a bio
 * @nr_vecs:	number of bio_vecs to allocate
 * @gfp_mask:   the GFP_* mask given to the slab allocator
 *
 * Use kmalloc to allocate a bio (including bvecs).  The bio must be initialized
 * using bio_init() before use.  To free a bio returned from this function use
 * kfree() after calling bio_uninit().  A bio returned from this function can
 * be reused by calling bio_uninit() before calling bio_init() again.
 *
 * Note that unlike bio_alloc() or bio_alloc_bioset() allocations from this
 * function are not backed by a mempool can fail.  Do not use this function
 * for allocations in the file system I/O path.
 *
 * Returns: Pointer to new bio on success, NULL on failure.
 */
struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
{}
EXPORT_SYMBOL();

void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
{}
EXPORT_SYMBOL();

/**
 * bio_truncate - truncate the bio to small size of @new_size
 * @bio:	the bio to be truncated
 * @new_size:	new size for truncating the bio
 *
 * Description:
 *   Truncate the bio to new size of @new_size. If bio_op(bio) is
 *   REQ_OP_READ, zero the truncated part. This function should only
 *   be used for handling corner cases, such as bio eod.
 */
static void bio_truncate(struct bio *bio, unsigned new_size)
{}

/**
 * guard_bio_eod - truncate a BIO to fit the block device
 * @bio:	bio to truncate
 *
 * This allows us to do IO even on the odd last sectors of a device, even if the
 * block size is some multiple of the physical sector size.
 *
 * We'll just truncate the bio to the size of the device, and clear the end of
 * the buffer head manually.  Truly out-of-range accesses will turn into actual
 * I/O errors, this only handles the "we need to be able to do I/O at the final
 * sector" case.
 */
void guard_bio_eod(struct bio *bio)
{}

static int __bio_alloc_cache_prune(struct bio_alloc_cache *cache,
				   unsigned int nr)
{}

static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
				  unsigned int nr)
{}

static int bio_cpu_dead(unsigned int cpu, struct hlist_node *node)
{}

static void bio_alloc_cache_destroy(struct bio_set *bs)
{}

static inline void bio_put_percpu_cache(struct bio *bio)
{}

/**
 * bio_put - release a reference to a bio
 * @bio:   bio to release reference to
 *
 * Description:
 *   Put a reference to a &struct bio, either one you have gotten with
 *   bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
 **/
void bio_put(struct bio *bio)
{}
EXPORT_SYMBOL();

static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
{}

/**
 * bio_alloc_clone - clone a bio that shares the original bio's biovec
 * @bdev: block_device to clone onto
 * @bio_src: bio to clone from
 * @gfp: allocation priority
 * @bs: bio_set to allocate from
 *
 * Allocate a new bio that is a clone of @bio_src. The caller owns the returned
 * bio, but not the actual data it points to.
 *
 * The caller must ensure that the return bio is not freed before @bio_src.
 */
struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
		gfp_t gfp, struct bio_set *bs)
{}
EXPORT_SYMBOL();

/**
 * bio_init_clone - clone a bio that shares the original bio's biovec
 * @bdev: block_device to clone onto
 * @bio: bio to clone into
 * @bio_src: bio to clone from
 * @gfp: allocation priority
 *
 * Initialize a new bio in caller provided memory that is a clone of @bio_src.
 * The caller owns the returned bio, but not the actual data it points to.
 *
 * The caller must ensure that @bio_src is not freed before @bio.
 */
int bio_init_clone(struct block_device *bdev, struct bio *bio,
		struct bio *bio_src, gfp_t gfp)
{}
EXPORT_SYMBOL();

/**
 * bio_full - check if the bio is full
 * @bio:	bio to check
 * @len:	length of one segment to be added
 *
 * Return true if @bio is full and one segment with @len bytes can't be
 * added to the bio, otherwise return false
 */
static inline bool bio_full(struct bio *bio, unsigned len)
{}

static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
		unsigned int len, unsigned int off, bool *same_page)
{}

/*
 * Try to merge a page into a segment, while obeying the hardware segment
 * size limit.  This is not for normal read/write bios, but for passthrough
 * or Zone Append operations that we can't split.
 */
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
		struct page *page, unsigned len, unsigned offset,
		bool *same_page)
{}

/**
 * bio_add_hw_page - attempt to add a page to a bio with hw constraints
 * @q: the target queue
 * @bio: destination bio
 * @page: page to add
 * @len: vec entry length
 * @offset: vec entry offset
 * @max_sectors: maximum number of sectors that can be added
 * @same_page: return if the segment has been merged inside the same page
 *
 * Add a page to a bio while respecting the hardware max_sectors, max_segment
 * and gap limitations.
 */
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
		struct page *page, unsigned int len, unsigned int offset,
		unsigned int max_sectors, bool *same_page)
{}

/**
 * bio_add_pc_page	- attempt to add page to passthrough bio
 * @q: the target queue
 * @bio: destination bio
 * @page: page to add
 * @len: vec entry length
 * @offset: vec entry offset
 *
 * Attempt to add a page to the bio_vec maplist. This can fail for a
 * number of reasons, such as the bio being full or target block device
 * limitations. The target block device must allow bio's up to PAGE_SIZE,
 * so it is always possible to add a single page to an empty bio.
 *
 * This should only be used by passthrough bios.
 */
int bio_add_pc_page(struct request_queue *q, struct bio *bio,
		struct page *page, unsigned int len, unsigned int offset)
{}
EXPORT_SYMBOL();

/**
 * bio_add_zone_append_page - attempt to add page to zone-append bio
 * @bio: destination bio
 * @page: page to add
 * @len: vec entry length
 * @offset: vec entry offset
 *
 * Attempt to add a page to the bio_vec maplist of a bio that will be submitted
 * for a zone-append request. This can fail for a number of reasons, such as the
 * bio being full or the target block device is not a zoned block device or
 * other limitations of the target block device. The target block device must
 * allow bio's up to PAGE_SIZE, so it is always possible to add a single page
 * to an empty bio.
 *
 * Returns: number of bytes added to the bio, or 0 in case of a failure.
 */
int bio_add_zone_append_page(struct bio *bio, struct page *page,
			     unsigned int len, unsigned int offset)
{}
EXPORT_SYMBOL_GPL();

/**
 * __bio_add_page - add page(s) to a bio in a new segment
 * @bio: destination bio
 * @page: start page to add
 * @len: length of the data to add, may cross pages
 * @off: offset of the data relative to @page, may cross pages
 *
 * Add the data at @page + @off to @bio as a new bvec.  The caller must ensure
 * that @bio has space for another bvec.
 */
void __bio_add_page(struct bio *bio, struct page *page,
		unsigned int len, unsigned int off)
{}
EXPORT_SYMBOL_GPL();

/**
 *	bio_add_page	-	attempt to add page(s) to bio
 *	@bio: destination bio
 *	@page: start page to add
 *	@len: vec entry length, may cross pages
 *	@offset: vec entry offset relative to @page, may cross pages
 *
 *	Attempt to add page(s) to the bio_vec maplist. This will only fail
 *	if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
 */
int bio_add_page(struct bio *bio, struct page *page,
		 unsigned int len, unsigned int offset)
{}
EXPORT_SYMBOL();

void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
			  size_t off)
{}
EXPORT_SYMBOL_GPL();

/**
 * bio_add_folio - Attempt to add part of a folio to a bio.
 * @bio: BIO to add to.
 * @folio: Folio to add.
 * @len: How many bytes from the folio to add.
 * @off: First byte in this folio to add.
 *
 * Filesystems that use folios can call this function instead of calling
 * bio_add_page() for each page in the folio.  If @off is bigger than
 * PAGE_SIZE, this function can create a bio_vec that starts in a page
 * after the bv_page.  BIOs do not support folios that are 4GiB or larger.
 *
 * Return: Whether the addition was successful.
 */
bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
		   size_t off)
{}
EXPORT_SYMBOL();

void __bio_release_pages(struct bio *bio, bool mark_dirty)
{}
EXPORT_SYMBOL_GPL();

void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
{}

static int bio_iov_add_page(struct bio *bio, struct page *page,
		unsigned int len, unsigned int offset)
{}

static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
		unsigned int len, unsigned int offset)
{}

#define PAGE_PTRS_PER_BVEC

/**
 * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
 * @bio: bio to add pages to
 * @iter: iov iterator describing the region to be mapped
 *
 * Extracts pages from *iter and appends them to @bio's bvec array.  The pages
 * will have to be cleaned up in the way indicated by the BIO_PAGE_PINNED flag.
 * For a multi-segment *iter, this function only adds pages from the next
 * non-empty segment of the iov iterator.
 */
static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{}

/**
 * bio_iov_iter_get_pages - add user or kernel pages to a bio
 * @bio: bio to add pages to
 * @iter: iov iterator describing the region to be added
 *
 * This takes either an iterator pointing to user memory, or one pointing to
 * kernel pages (BVEC iterator). If we're adding user pages, we pin them and
 * map them into the kernel. On IO completion, the caller should put those
 * pages. For bvec based iterators bio_iov_iter_get_pages() uses the provided
 * bvecs rather than copying them. Hence anyone issuing kiocb based IO needs
 * to ensure the bvecs and pages stay referenced until the submitted I/O is
 * completed by a call to ->ki_complete() or returns with an error other than
 * -EIOCBQUEUED. The caller needs to check if the bio is flagged BIO_NO_PAGE_REF
 * on IO completion. If it isn't, then pages should be released.
 *
 * The function tries, but does not guarantee, to pin as many pages as
 * fit into the bio, or are requested in @iter, whatever is smaller. If
 * MM encounters an error pinning the requested pages, it stops. Error
 * is returned only if 0 pages could be pinned.
 */
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{}
EXPORT_SYMBOL_GPL();

static void submit_bio_wait_endio(struct bio *bio)
{}

/**
 * submit_bio_wait - submit a bio, and wait until it completes
 * @bio: The &struct bio which describes the I/O
 *
 * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
 * bio_endio() on failure.
 *
 * WARNING: Unlike to how submit_bio() is usually used, this function does not
 * result in bio reference to be consumed. The caller must drop the reference
 * on his own.
 */
int submit_bio_wait(struct bio *bio)
{}
EXPORT_SYMBOL();

static void bio_wait_end_io(struct bio *bio)
{}

/*
 * bio_await_chain - ends @bio and waits for every chained bio to complete
 */
void bio_await_chain(struct bio *bio)
{}

void __bio_advance(struct bio *bio, unsigned bytes)
{}
EXPORT_SYMBOL();

void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
			struct bio *src, struct bvec_iter *src_iter)
{}
EXPORT_SYMBOL();

/**
 * bio_copy_data - copy contents of data buffers from one bio to another
 * @src: source bio
 * @dst: destination bio
 *
 * Stops when it reaches the end of either @src or @dst - that is, copies
 * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
 */
void bio_copy_data(struct bio *dst, struct bio *src)
{}
EXPORT_SYMBOL();

void bio_free_pages(struct bio *bio)
{}
EXPORT_SYMBOL();

/*
 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
 * for performing direct-IO in BIOs.
 *
 * The problem is that we cannot run folio_mark_dirty() from interrupt context
 * because the required locks are not interrupt-safe.  So what we can do is to
 * mark the pages dirty _before_ performing IO.  And in interrupt context,
 * check that the pages are still dirty.   If so, fine.  If not, redirty them
 * in process context.
 *
 * Note that this code is very hard to test under normal circumstances because
 * direct-io pins the pages with get_user_pages().  This makes
 * is_page_cache_freeable return false, and the VM will not clean the pages.
 * But other code (eg, flusher threads) could clean the pages if they are mapped
 * pagecache.
 *
 * Simply disabling the call to bio_set_pages_dirty() is a good way to test the
 * deferred bio dirtying paths.
 */

/*
 * bio_set_pages_dirty() will mark all the bio's pages as dirty.
 */
void bio_set_pages_dirty(struct bio *bio)
{}
EXPORT_SYMBOL_GPL();

/*
 * bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
 * If they are, then fine.  If, however, some pages are clean then they must
 * have been written out during the direct-IO read.  So we take another ref on
 * the BIO and re-dirty the pages in process context.
 *
 * It is expected that bio_check_pages_dirty() will wholly own the BIO from
 * here on.  It will unpin each page and will run one bio_put() against the
 * BIO.
 */

static void bio_dirty_fn(struct work_struct *work);

static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
static DEFINE_SPINLOCK(bio_dirty_lock);
static struct bio *bio_dirty_list;

/*
 * This runs in process context
 */
static void bio_dirty_fn(struct work_struct *work)
{}

void bio_check_pages_dirty(struct bio *bio)
{}
EXPORT_SYMBOL_GPL();

static inline bool bio_remaining_done(struct bio *bio)
{}

/**
 * bio_endio - end I/O on a bio
 * @bio:	bio
 *
 * Description:
 *   bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
 *   way to end I/O on a bio. No one should call bi_end_io() directly on a
 *   bio unless they own it and thus know that it has an end_io function.
 *
 *   bio_endio() can be called several times on a bio that has been chained
 *   using bio_chain().  The ->bi_end_io() function will only be called the
 *   last time.
 **/
void bio_endio(struct bio *bio)
{}
EXPORT_SYMBOL();

/**
 * bio_split - split a bio
 * @bio:	bio to split
 * @sectors:	number of sectors to split from the front of @bio
 * @gfp:	gfp mask
 * @bs:		bio set to allocate from
 *
 * Allocates and returns a new bio which represents @sectors from the start of
 * @bio, and updates @bio to represent the remaining sectors.
 *
 * Unless this is a discard request the newly allocated bio will point
 * to @bio's bi_io_vec. It is the caller's responsibility to ensure that
 * neither @bio nor @bs are freed before the split bio.
 */
struct bio *bio_split(struct bio *bio, int sectors,
		      gfp_t gfp, struct bio_set *bs)
{}
EXPORT_SYMBOL();

/**
 * bio_trim - trim a bio
 * @bio:	bio to trim
 * @offset:	number of sectors to trim from the front of @bio
 * @size:	size we want to trim @bio to, in sectors
 *
 * This function is typically used for bios that are cloned and submitted
 * to the underlying device in parts.
 */
void bio_trim(struct bio *bio, sector_t offset, sector_t size)
{}
EXPORT_SYMBOL_GPL();

/*
 * create memory pools for biovec's in a bio_set.
 * use the global biovec slabs created for general use.
 */
int biovec_init_pool(mempool_t *pool, int pool_entries)
{}

/*
 * bioset_exit - exit a bioset initialized with bioset_init()
 *
 * May be called on a zeroed but uninitialized bioset (i.e. allocated with
 * kzalloc()).
 */
void bioset_exit(struct bio_set *bs)
{}
EXPORT_SYMBOL();

/**
 * bioset_init - Initialize a bio_set
 * @bs:		pool to initialize
 * @pool_size:	Number of bio and bio_vecs to cache in the mempool
 * @front_pad:	Number of bytes to allocate in front of the returned bio
 * @flags:	Flags to modify behavior, currently %BIOSET_NEED_BVECS
 *              and %BIOSET_NEED_RESCUER
 *
 * Description:
 *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
 *    to ask for a number of bytes to be allocated in front of the bio.
 *    Front pad allocation is useful for embedding the bio inside
 *    another structure, to avoid allocating extra data to go with the bio.
 *    Note that the bio must be embedded at the END of that structure always,
 *    or things will break badly.
 *    If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
 *    for allocating iovecs.  This pool is not needed e.g. for bio_init_clone().
 *    If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used
 *    to dispatch queued requests when the mempool runs out of space.
 *
 */
int bioset_init(struct bio_set *bs,
		unsigned int pool_size,
		unsigned int front_pad,
		int flags)
{}
EXPORT_SYMBOL();

static int __init init_bio(void)
{}
subsys_initcall(init_bio);