// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2023 Red Hat */ #include "data-vio.h" #include <linux/atomic.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/delay.h> #include <linux/device-mapper.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/lz4.h> #include <linux/minmax.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/wait.h> #include "logger.h" #include "memory-alloc.h" #include "murmurhash3.h" #include "permassert.h" #include "block-map.h" #include "dump.h" #include "encodings.h" #include "int-map.h" #include "io-submitter.h" #include "logical-zone.h" #include "packer.h" #include "recovery-journal.h" #include "slab-depot.h" #include "status-codes.h" #include "types.h" #include "vdo.h" #include "vio.h" #include "wait-queue.h" /** * DOC: Bio flags. * * For certain flags set on user bios, if the user bio has not yet been acknowledged, setting those * flags on our own bio(s) for that request may help underlying layers better fulfill the user * bio's needs. This constant contains the aggregate of those flags; VDO strips all the other * flags, as they convey incorrect information. * * These flags are always irrelevant if we have already finished the user bio as they are only * hints on IO importance. If VDO has finished the user bio, any remaining IO done doesn't care how * important finishing the finished bio was. * * Note that bio.c contains the complete list of flags we believe may be set; the following list * explains the action taken with each of those flags VDO could receive: * * * REQ_SYNC: Passed down if the user bio is not yet completed, since it indicates the user bio * completion is required for further work to be done by the issuer. * * REQ_META: Passed down if the user bio is not yet completed, since it may mean the lower layer * treats it as more urgent, similar to REQ_SYNC. * * REQ_PRIO: Passed down if the user bio is not yet completed, since it indicates the user bio is * important. * * REQ_NOMERGE: Set only if the incoming bio was split; irrelevant to VDO IO. * * REQ_IDLE: Set if the incoming bio had more IO quickly following; VDO's IO pattern doesn't * match incoming IO, so this flag is incorrect for it. * * REQ_FUA: Handled separately, and irrelevant to VDO IO otherwise. * * REQ_RAHEAD: Passed down, as, for reads, it indicates trivial importance. * * REQ_BACKGROUND: Not passed down, as VIOs are a limited resource and VDO needs them recycled * ASAP to service heavy load, which is the only place where REQ_BACKGROUND might aid in load * prioritization. */ static blk_opf_t PASSTHROUGH_FLAGS = …; /** * DOC: * * The data_vio_pool maintains the pool of data_vios which a vdo uses to service incoming bios. For * correctness, and in order to avoid potentially expensive or blocking memory allocations during * normal operation, the number of concurrently active data_vios is capped. Furthermore, in order * to avoid starvation of reads and writes, at most 75% of the data_vios may be used for * discards. The data_vio_pool is responsible for enforcing these limits. Threads submitting bios * for which a data_vio or discard permit are not available will block until the necessary * resources are available. The pool is also responsible for distributing resources to blocked * threads and waking them. Finally, the pool attempts to batch the work of recycling data_vios by * performing the work of actually assigning resources to blocked threads or placing data_vios back * into the pool on a single cpu at a time. * * The pool contains two "limiters", one for tracking data_vios and one for tracking discard * permits. The limiters also provide safe cross-thread access to pool statistics without the need * to take the pool's lock. When a thread submits a bio to a vdo device, it will first attempt to * get a discard permit if it is a discard, and then to get a data_vio. If the necessary resources * are available, the incoming bio will be assigned to the acquired data_vio, and it will be * launched. However, if either of these are unavailable, the arrival time of the bio is recorded * in the bio's bi_private field, the bio and its submitter are both queued on the appropriate * limiter and the submitting thread will then put itself to sleep. (note that this mechanism will * break if jiffies are only 32 bits.) * * Whenever a data_vio has completed processing for the bio it was servicing, release_data_vio() * will be called on it. This function will add the data_vio to a funnel queue, and then check the * state of the pool. If the pool is not currently processing released data_vios, the pool's * completion will be enqueued on a cpu queue. This obviates the need for the releasing threads to * hold the pool's lock, and also batches release work while avoiding starvation of the cpu * threads. * * Whenever the pool's completion is run on a cpu thread, it calls process_release_callback() which * processes a batch of returned data_vios (currently at most 32) from the pool's funnel queue. For * each data_vio, it first checks whether that data_vio was processing a discard. If so, and there * is a blocked bio waiting for a discard permit, that permit is notionally transferred to the * eldest discard waiter, and that waiter is moved to the end of the list of discard bios waiting * for a data_vio. If there are no discard waiters, the discard permit is returned to the pool. * Next, the data_vio is assigned to the oldest blocked bio which either has a discard permit, or * doesn't need one and relaunched. If neither of these exist, the data_vio is returned to the * pool. Finally, if any waiting bios were launched, the threads which blocked trying to submit * them are awakened. */ #define DATA_VIO_RELEASE_BATCH_SIZE … static const unsigned int VDO_SECTORS_PER_BLOCK_MASK = …; static const u32 COMPRESSION_STATUS_MASK = …; static const u32 MAY_NOT_COMPRESS_MASK = …; struct limiter; assigner_fn; /* Bookkeeping structure for a single type of resource. */ struct limiter { … }; /* * A data_vio_pool is a collection of preallocated data_vios which may be acquired from any thread, * and are released in batches. */ struct data_vio_pool { … }; static const char * const ASYNC_OPERATION_NAMES[] = …; /* The steps taken cleaning up a VIO, in the order they are performed. */ enum data_vio_cleanup_stage { … }; static inline struct data_vio_pool * __must_check as_data_vio_pool(struct vdo_completion *completion) { … } static inline u64 get_arrival_time(struct bio *bio) { … } /** * check_for_drain_complete_locked() - Check whether a data_vio_pool has no outstanding data_vios * or waiters while holding the pool's lock. */ static bool check_for_drain_complete_locked(struct data_vio_pool *pool) { … } static void initialize_lbn_lock(struct data_vio *data_vio, logical_block_number_t lbn) { … } static void launch_locked_request(struct data_vio *data_vio) { … } static void acknowledge_data_vio(struct data_vio *data_vio) { … } static void copy_to_bio(struct bio *bio, char *data_ptr) { … } struct data_vio_compression_status get_data_vio_compression_status(struct data_vio *data_vio) { … } /** * pack_status() - Convert a data_vio_compression_status into a u32 which may be stored * atomically. * @status: The state to convert. * * Return: The compression state packed into a u32. */ static u32 __must_check pack_status(struct data_vio_compression_status status) { … } /** * set_data_vio_compression_status() - Set the compression status of a data_vio. * @state: The expected current status of the data_vio. * @new_state: The status to set. * * Return: true if the new status was set, false if the data_vio's compression status did not * match the expected state, and so was left unchanged. */ static bool __must_check set_data_vio_compression_status(struct data_vio *data_vio, struct data_vio_compression_status status, struct data_vio_compression_status new_status) { … } struct data_vio_compression_status advance_data_vio_compression_stage(struct data_vio *data_vio) { … } /** * cancel_data_vio_compression() - Prevent this data_vio from being compressed or packed. * * Return: true if the data_vio is in the packer and the caller was the first caller to cancel it. */ bool cancel_data_vio_compression(struct data_vio *data_vio) { … } /** * attempt_logical_block_lock() - Attempt to acquire the lock on a logical block. * @completion: The data_vio for an external data request as a completion. * * This is the start of the path for all external requests. It is registered in launch_data_vio(). */ static void attempt_logical_block_lock(struct vdo_completion *completion) { … } /** * launch_data_vio() - (Re)initialize a data_vio to have a new logical block number, keeping the * same parent and other state and send it on its way. */ static void launch_data_vio(struct data_vio *data_vio, logical_block_number_t lbn) { … } static bool is_zero_block(char *block) { … } static void copy_from_bio(struct bio *bio, char *data_ptr) { … } static void launch_bio(struct vdo *vdo, struct data_vio *data_vio, struct bio *bio) { … } static void assign_data_vio(struct limiter *limiter, struct data_vio *data_vio) { … } static void assign_discard_permit(struct limiter *limiter) { … } static void get_waiters(struct limiter *limiter) { … } static inline struct data_vio *get_available_data_vio(struct data_vio_pool *pool) { … } static void assign_data_vio_to_waiter(struct limiter *limiter) { … } static void update_limiter(struct limiter *limiter) { … } /** * schedule_releases() - Ensure that release processing is scheduled. * * If this call switches the state to processing, enqueue. Otherwise, some other thread has already * done so. */ static void schedule_releases(struct data_vio_pool *pool) { … } static void reuse_or_release_resources(struct data_vio_pool *pool, struct data_vio *data_vio, struct list_head *returned) { … } /** * process_release_callback() - Process a batch of data_vio releases. * @completion: The pool with data_vios to release. */ static void process_release_callback(struct vdo_completion *completion) { … } static void initialize_limiter(struct limiter *limiter, struct data_vio_pool *pool, assigner_fn assigner, data_vio_count_t limit) { … } /** * initialize_data_vio() - Allocate the components of a data_vio. * * The caller is responsible for cleaning up the data_vio on error. * * Return: VDO_SUCCESS or an error. */ static int initialize_data_vio(struct data_vio *data_vio, struct vdo *vdo) { … } static void destroy_data_vio(struct data_vio *data_vio) { … } /** * make_data_vio_pool() - Initialize a data_vio pool. * @vdo: The vdo to which the pool will belong. * @pool_size: The number of data_vios in the pool. * @discard_limit: The maximum number of data_vios which may be used for discards. * @pool: A pointer to hold the newly allocated pool. */ int make_data_vio_pool(struct vdo *vdo, data_vio_count_t pool_size, data_vio_count_t discard_limit, struct data_vio_pool **pool_ptr) { … } /** * free_data_vio_pool() - Free a data_vio_pool and the data_vios in it. * * All data_vios must be returned to the pool before calling this function. */ void free_data_vio_pool(struct data_vio_pool *pool) { … } static bool acquire_permit(struct limiter *limiter) { … } static void wait_permit(struct limiter *limiter, struct bio *bio) __releases(&limiter->pool->lock) { … } /** * vdo_launch_bio() - Acquire a data_vio from the pool, assign the bio to it, and launch it. * * This will block if data_vios or discard permits are not available. */ void vdo_launch_bio(struct data_vio_pool *pool, struct bio *bio) { … } /* Implements vdo_admin_initiator_fn. */ static void initiate_drain(struct admin_state *state) { … } static void assert_on_vdo_cpu_thread(const struct vdo *vdo, const char *name) { … } /** * drain_data_vio_pool() - Wait asynchronously for all data_vios to be returned to the pool. * @completion: The completion to notify when the pool has drained. */ void drain_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion) { … } /** * resume_data_vio_pool() - Resume a data_vio pool. * @completion: The completion to notify when the pool has resumed. */ void resume_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion) { … } static void dump_limiter(const char *name, struct limiter *limiter) { … } /** * dump_data_vio_pool() - Dump a data_vio pool to the log. * @dump_vios: Whether to dump the details of each busy data_vio as well. */ void dump_data_vio_pool(struct data_vio_pool *pool, bool dump_vios) { … } data_vio_count_t get_data_vio_pool_active_discards(struct data_vio_pool *pool) { … } data_vio_count_t get_data_vio_pool_discard_limit(struct data_vio_pool *pool) { … } data_vio_count_t get_data_vio_pool_maximum_discards(struct data_vio_pool *pool) { … } int set_data_vio_pool_discard_limit(struct data_vio_pool *pool, data_vio_count_t limit) { … } data_vio_count_t get_data_vio_pool_active_requests(struct data_vio_pool *pool) { … } data_vio_count_t get_data_vio_pool_request_limit(struct data_vio_pool *pool) { … } data_vio_count_t get_data_vio_pool_maximum_requests(struct data_vio_pool *pool) { … } static void update_data_vio_error_stats(struct data_vio *data_vio) { … } static void perform_cleanup_stage(struct data_vio *data_vio, enum data_vio_cleanup_stage stage); /** * release_allocated_lock() - Release the PBN lock and/or the reference on the allocated block at * the end of processing a data_vio. */ static void release_allocated_lock(struct vdo_completion *completion) { … } /** release_lock() - Release an uncontended LBN lock. */ static void release_lock(struct data_vio *data_vio, struct lbn_lock *lock) { … } /** transfer_lock() - Transfer a contended LBN lock to the eldest waiter. */ static void transfer_lock(struct data_vio *data_vio, struct lbn_lock *lock) { … } /** * release_logical_lock() - Release the logical block lock and flush generation lock at the end of * processing a data_vio. */ static void release_logical_lock(struct vdo_completion *completion) { … } /** clean_hash_lock() - Release the hash lock at the end of processing a data_vio. */ static void clean_hash_lock(struct vdo_completion *completion) { … } /** * finish_cleanup() - Make some assertions about a data_vio which has finished cleaning up. * * If it is part of a multi-block discard, starts on the next block, otherwise, returns it to the * pool. */ static void finish_cleanup(struct data_vio *data_vio) { … } /** perform_cleanup_stage() - Perform the next step in the process of cleaning up a data_vio. */ static void perform_cleanup_stage(struct data_vio *data_vio, enum data_vio_cleanup_stage stage) { … } void complete_data_vio(struct vdo_completion *completion) { … } static void enter_read_only_mode(struct vdo_completion *completion) { … } void handle_data_vio_error(struct vdo_completion *completion) { … } /** * get_data_vio_operation_name() - Get the name of the last asynchronous operation performed on a * data_vio. */ const char *get_data_vio_operation_name(struct data_vio *data_vio) { … } /** * data_vio_allocate_data_block() - Allocate a data block. * * @write_lock_type: The type of write lock to obtain on the block. * @callback: The callback which will attempt an allocation in the current zone and continue if it * succeeds. * @error_handler: The handler for errors while allocating. */ void data_vio_allocate_data_block(struct data_vio *data_vio, enum pbn_lock_type write_lock_type, vdo_action_fn callback, vdo_action_fn error_handler) { … } /** * release_data_vio_allocation_lock() - Release the PBN lock on a data_vio's allocated block. * @reset: If true, the allocation will be reset (i.e. any allocated pbn will be forgotten). * * If the reference to the locked block is still provisional, it will be released as well. */ void release_data_vio_allocation_lock(struct data_vio *data_vio, bool reset) { … } /** * uncompress_data_vio() - Uncompress the data a data_vio has just read. * @mapping_state: The mapping state indicating which fragment to decompress. * @buffer: The buffer to receive the uncompressed data. */ int uncompress_data_vio(struct data_vio *data_vio, enum block_mapping_state mapping_state, char *buffer) { … } /** * modify_for_partial_write() - Do the modify-write part of a read-modify-write cycle. * @completion: The data_vio which has just finished its read. * * This callback is registered in read_block(). */ static void modify_for_partial_write(struct vdo_completion *completion) { … } static void complete_read(struct vdo_completion *completion) { … } static void read_endio(struct bio *bio) { … } static void complete_zero_read(struct vdo_completion *completion) { … } /** * read_block() - Read a block asynchronously. * * This is the callback registered in read_block_mapping(). */ static void read_block(struct vdo_completion *completion) { … } static inline struct data_vio * reference_count_update_completion_as_data_vio(struct vdo_completion *completion) { … } /** * update_block_map() - Rendezvous of the data_vio and decrement completions after each has * made its reference updates. Handle any error from either, or proceed * to updating the block map. * @completion: The completion of the write in progress. */ static void update_block_map(struct vdo_completion *completion) { … } static void decrement_reference_count(struct vdo_completion *completion) { … } static void increment_reference_count(struct vdo_completion *completion) { … } /** journal_remapping() - Add a recovery journal entry for a data remapping. */ static void journal_remapping(struct vdo_completion *completion) { … } /** * read_old_block_mapping() - Get the previous PBN/LBN mapping of an in-progress write. * * Gets the previous PBN mapped to this LBN from the block map, so as to make an appropriate * journal entry referencing the removal of this LBN->PBN mapping. */ static void read_old_block_mapping(struct vdo_completion *completion) { … } void update_metadata_for_data_vio_write(struct data_vio *data_vio, struct pbn_lock *lock) { … } /** * pack_compressed_data() - Attempt to pack the compressed data_vio into a block. * * This is the callback registered in launch_compress_data_vio(). */ static void pack_compressed_data(struct vdo_completion *completion) { … } /** * compress_data_vio() - Do the actual work of compressing the data on a CPU queue. * * This callback is registered in launch_compress_data_vio(). */ static void compress_data_vio(struct vdo_completion *completion) { … } /** * launch_compress_data_vio() - Continue a write by attempting to compress the data. * * This is a re-entry point to vio_write used by hash locks. */ void launch_compress_data_vio(struct data_vio *data_vio) { … } /** * hash_data_vio() - Hash the data in a data_vio and set the hash zone (which also flags the record * name as set). * This callback is registered in prepare_for_dedupe(). */ static void hash_data_vio(struct vdo_completion *completion) { … } /** prepare_for_dedupe() - Prepare for the dedupe path after attempting to get an allocation. */ static void prepare_for_dedupe(struct data_vio *data_vio) { … } /** * write_bio_finished() - This is the bio_end_io function registered in write_block() to be called * when a data_vio's write to the underlying storage has completed. */ static void write_bio_finished(struct bio *bio) { … } /** write_data_vio() - Write a data block to storage without compression. */ void write_data_vio(struct data_vio *data_vio) { … } /** * acknowledge_write_callback() - Acknowledge a write to the requestor. * * This callback is registered in allocate_block() and continue_write_with_block_map_slot(). */ static void acknowledge_write_callback(struct vdo_completion *completion) { … } /** * allocate_block() - Attempt to allocate a block in the current allocation zone. * * This callback is registered in continue_write_with_block_map_slot(). */ static void allocate_block(struct vdo_completion *completion) { … } /** * handle_allocation_error() - Handle an error attempting to allocate a block. * * This error handler is registered in continue_write_with_block_map_slot(). */ static void handle_allocation_error(struct vdo_completion *completion) { … } static int assert_is_discard(struct data_vio *data_vio) { … } /** * continue_data_vio_with_block_map_slot() - Read the data_vio's mapping from the block map. * * This callback is registered in launch_read_data_vio(). */ void continue_data_vio_with_block_map_slot(struct vdo_completion *completion) { … }