linux/drivers/md/dm-vdo/data-vio.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright 2023 Red Hat
 */

#include "data-vio.h"

#include <linux/atomic.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/device-mapper.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/lz4.h>
#include <linux/minmax.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/wait.h>

#include "logger.h"
#include "memory-alloc.h"
#include "murmurhash3.h"
#include "permassert.h"

#include "block-map.h"
#include "dump.h"
#include "encodings.h"
#include "int-map.h"
#include "io-submitter.h"
#include "logical-zone.h"
#include "packer.h"
#include "recovery-journal.h"
#include "slab-depot.h"
#include "status-codes.h"
#include "types.h"
#include "vdo.h"
#include "vio.h"
#include "wait-queue.h"

/**
 * DOC: Bio flags.
 *
 * For certain flags set on user bios, if the user bio has not yet been acknowledged, setting those
 * flags on our own bio(s) for that request may help underlying layers better fulfill the user
 * bio's needs. This constant contains the aggregate of those flags; VDO strips all the other
 * flags, as they convey incorrect information.
 *
 * These flags are always irrelevant if we have already finished the user bio as they are only
 * hints on IO importance. If VDO has finished the user bio, any remaining IO done doesn't care how
 * important finishing the finished bio was.
 *
 * Note that bio.c contains the complete list of flags we believe may be set; the following list
 * explains the action taken with each of those flags VDO could receive:
 *
 * * REQ_SYNC: Passed down if the user bio is not yet completed, since it indicates the user bio
 *   completion is required for further work to be done by the issuer.
 * * REQ_META: Passed down if the user bio is not yet completed, since it may mean the lower layer
 *   treats it as more urgent, similar to REQ_SYNC.
 * * REQ_PRIO: Passed down if the user bio is not yet completed, since it indicates the user bio is
 *   important.
 * * REQ_NOMERGE: Set only if the incoming bio was split; irrelevant to VDO IO.
 * * REQ_IDLE: Set if the incoming bio had more IO quickly following; VDO's IO pattern doesn't
 *   match incoming IO, so this flag is incorrect for it.
 * * REQ_FUA: Handled separately, and irrelevant to VDO IO otherwise.
 * * REQ_RAHEAD: Passed down, as, for reads, it indicates trivial importance.
 * * REQ_BACKGROUND: Not passed down, as VIOs are a limited resource and VDO needs them recycled
 *   ASAP to service heavy load, which is the only place where REQ_BACKGROUND might aid in load
 *   prioritization.
 */
static blk_opf_t PASSTHROUGH_FLAGS =;

/**
 * DOC:
 *
 * The data_vio_pool maintains the pool of data_vios which a vdo uses to service incoming bios. For
 * correctness, and in order to avoid potentially expensive or blocking memory allocations during
 * normal operation, the number of concurrently active data_vios is capped. Furthermore, in order
 * to avoid starvation of reads and writes, at most 75% of the data_vios may be used for
 * discards. The data_vio_pool is responsible for enforcing these limits. Threads submitting bios
 * for which a data_vio or discard permit are not available will block until the necessary
 * resources are available. The pool is also responsible for distributing resources to blocked
 * threads and waking them. Finally, the pool attempts to batch the work of recycling data_vios by
 * performing the work of actually assigning resources to blocked threads or placing data_vios back
 * into the pool on a single cpu at a time.
 *
 * The pool contains two "limiters", one for tracking data_vios and one for tracking discard
 * permits. The limiters also provide safe cross-thread access to pool statistics without the need
 * to take the pool's lock. When a thread submits a bio to a vdo device, it will first attempt to
 * get a discard permit if it is a discard, and then to get a data_vio. If the necessary resources
 * are available, the incoming bio will be assigned to the acquired data_vio, and it will be
 * launched. However, if either of these are unavailable, the arrival time of the bio is recorded
 * in the bio's bi_private field, the bio and its submitter are both queued on the appropriate
 * limiter and the submitting thread will then put itself to sleep. (note that this mechanism will
 * break if jiffies are only 32 bits.)
 *
 * Whenever a data_vio has completed processing for the bio it was servicing, release_data_vio()
 * will be called on it. This function will add the data_vio to a funnel queue, and then check the
 * state of the pool. If the pool is not currently processing released data_vios, the pool's
 * completion will be enqueued on a cpu queue. This obviates the need for the releasing threads to
 * hold the pool's lock, and also batches release work while avoiding starvation of the cpu
 * threads.
 *
 * Whenever the pool's completion is run on a cpu thread, it calls process_release_callback() which
 * processes a batch of returned data_vios (currently at most 32) from the pool's funnel queue. For
 * each data_vio, it first checks whether that data_vio was processing a discard. If so, and there
 * is a blocked bio waiting for a discard permit, that permit is notionally transferred to the
 * eldest discard waiter, and that waiter is moved to the end of the list of discard bios waiting
 * for a data_vio. If there are no discard waiters, the discard permit is returned to the pool.
 * Next, the data_vio is assigned to the oldest blocked bio which either has a discard permit, or
 * doesn't need one and relaunched. If neither of these exist, the data_vio is returned to the
 * pool. Finally, if any waiting bios were launched, the threads which blocked trying to submit
 * them are awakened.
 */

#define DATA_VIO_RELEASE_BATCH_SIZE

static const unsigned int VDO_SECTORS_PER_BLOCK_MASK =;
static const u32 COMPRESSION_STATUS_MASK =;
static const u32 MAY_NOT_COMPRESS_MASK =;

struct limiter;
assigner_fn;

/* Bookkeeping structure for a single type of resource. */
struct limiter {};

/*
 * A data_vio_pool is a collection of preallocated data_vios which may be acquired from any thread,
 * and are released in batches.
 */
struct data_vio_pool {};

static const char * const ASYNC_OPERATION_NAMES[] =;

/* The steps taken cleaning up a VIO, in the order they are performed. */
enum data_vio_cleanup_stage {};

static inline struct data_vio_pool * __must_check
as_data_vio_pool(struct vdo_completion *completion)
{}

static inline u64 get_arrival_time(struct bio *bio)
{}

/**
 * check_for_drain_complete_locked() - Check whether a data_vio_pool has no outstanding data_vios
 *				       or waiters while holding the pool's lock.
 */
static bool check_for_drain_complete_locked(struct data_vio_pool *pool)
{}

static void initialize_lbn_lock(struct data_vio *data_vio, logical_block_number_t lbn)
{}

static void launch_locked_request(struct data_vio *data_vio)
{}

static void acknowledge_data_vio(struct data_vio *data_vio)
{}

static void copy_to_bio(struct bio *bio, char *data_ptr)
{}

struct data_vio_compression_status get_data_vio_compression_status(struct data_vio *data_vio)
{}

/**
 * pack_status() - Convert a data_vio_compression_status into a u32 which may be stored
 *                 atomically.
 * @status: The state to convert.
 *
 * Return: The compression state packed into a u32.
 */
static u32 __must_check pack_status(struct data_vio_compression_status status)
{}

/**
 * set_data_vio_compression_status() - Set the compression status of a data_vio.
 * @state: The expected current status of the data_vio.
 * @new_state: The status to set.
 *
 * Return: true if the new status was set, false if the data_vio's compression status did not
 *         match the expected state, and so was left unchanged.
 */
static bool __must_check
set_data_vio_compression_status(struct data_vio *data_vio,
				struct data_vio_compression_status status,
				struct data_vio_compression_status new_status)
{}

struct data_vio_compression_status advance_data_vio_compression_stage(struct data_vio *data_vio)
{}

/**
 * cancel_data_vio_compression() - Prevent this data_vio from being compressed or packed.
 *
 * Return: true if the data_vio is in the packer and the caller was the first caller to cancel it.
 */
bool cancel_data_vio_compression(struct data_vio *data_vio)
{}

/**
 * attempt_logical_block_lock() - Attempt to acquire the lock on a logical block.
 * @completion: The data_vio for an external data request as a completion.
 *
 * This is the start of the path for all external requests. It is registered in launch_data_vio().
 */
static void attempt_logical_block_lock(struct vdo_completion *completion)
{}

/**
 * launch_data_vio() - (Re)initialize a data_vio to have a new logical block number, keeping the
 *		       same parent and other state and send it on its way.
 */
static void launch_data_vio(struct data_vio *data_vio, logical_block_number_t lbn)
{}

static bool is_zero_block(char *block)
{}

static void copy_from_bio(struct bio *bio, char *data_ptr)
{}

static void launch_bio(struct vdo *vdo, struct data_vio *data_vio, struct bio *bio)
{}

static void assign_data_vio(struct limiter *limiter, struct data_vio *data_vio)
{}

static void assign_discard_permit(struct limiter *limiter)
{}

static void get_waiters(struct limiter *limiter)
{}

static inline struct data_vio *get_available_data_vio(struct data_vio_pool *pool)
{}

static void assign_data_vio_to_waiter(struct limiter *limiter)
{}

static void update_limiter(struct limiter *limiter)
{}

/**
 * schedule_releases() - Ensure that release processing is scheduled.
 *
 * If this call switches the state to processing, enqueue. Otherwise, some other thread has already
 * done so.
 */
static void schedule_releases(struct data_vio_pool *pool)
{}

static void reuse_or_release_resources(struct data_vio_pool *pool,
				       struct data_vio *data_vio,
				       struct list_head *returned)
{}

/**
 * process_release_callback() - Process a batch of data_vio releases.
 * @completion: The pool with data_vios to release.
 */
static void process_release_callback(struct vdo_completion *completion)
{}

static void initialize_limiter(struct limiter *limiter, struct data_vio_pool *pool,
			       assigner_fn assigner, data_vio_count_t limit)
{}

/**
 * initialize_data_vio() - Allocate the components of a data_vio.
 *
 * The caller is responsible for cleaning up the data_vio on error.
 *
 * Return: VDO_SUCCESS or an error.
 */
static int initialize_data_vio(struct data_vio *data_vio, struct vdo *vdo)
{}

static void destroy_data_vio(struct data_vio *data_vio)
{}

/**
 * make_data_vio_pool() - Initialize a data_vio pool.
 * @vdo: The vdo to which the pool will belong.
 * @pool_size: The number of data_vios in the pool.
 * @discard_limit: The maximum number of data_vios which may be used for discards.
 * @pool: A pointer to hold the newly allocated pool.
 */
int make_data_vio_pool(struct vdo *vdo, data_vio_count_t pool_size,
		       data_vio_count_t discard_limit, struct data_vio_pool **pool_ptr)
{}

/**
 * free_data_vio_pool() - Free a data_vio_pool and the data_vios in it.
 *
 * All data_vios must be returned to the pool before calling this function.
 */
void free_data_vio_pool(struct data_vio_pool *pool)
{}

static bool acquire_permit(struct limiter *limiter)
{}

static void wait_permit(struct limiter *limiter, struct bio *bio)
	__releases(&limiter->pool->lock)
{}

/**
 * vdo_launch_bio() - Acquire a data_vio from the pool, assign the bio to it, and launch it.
 *
 * This will block if data_vios or discard permits are not available.
 */
void vdo_launch_bio(struct data_vio_pool *pool, struct bio *bio)
{}

/* Implements vdo_admin_initiator_fn. */
static void initiate_drain(struct admin_state *state)
{}

static void assert_on_vdo_cpu_thread(const struct vdo *vdo, const char *name)
{}

/**
 * drain_data_vio_pool() - Wait asynchronously for all data_vios to be returned to the pool.
 * @completion: The completion to notify when the pool has drained.
 */
void drain_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion)
{}

/**
 * resume_data_vio_pool() - Resume a data_vio pool.
 * @completion: The completion to notify when the pool has resumed.
 */
void resume_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion)
{}

static void dump_limiter(const char *name, struct limiter *limiter)
{}

/**
 * dump_data_vio_pool() - Dump a data_vio pool to the log.
 * @dump_vios: Whether to dump the details of each busy data_vio as well.
 */
void dump_data_vio_pool(struct data_vio_pool *pool, bool dump_vios)
{}

data_vio_count_t get_data_vio_pool_active_discards(struct data_vio_pool *pool)
{}

data_vio_count_t get_data_vio_pool_discard_limit(struct data_vio_pool *pool)
{}

data_vio_count_t get_data_vio_pool_maximum_discards(struct data_vio_pool *pool)
{}

int set_data_vio_pool_discard_limit(struct data_vio_pool *pool, data_vio_count_t limit)
{}

data_vio_count_t get_data_vio_pool_active_requests(struct data_vio_pool *pool)
{}

data_vio_count_t get_data_vio_pool_request_limit(struct data_vio_pool *pool)
{}

data_vio_count_t get_data_vio_pool_maximum_requests(struct data_vio_pool *pool)
{}

static void update_data_vio_error_stats(struct data_vio *data_vio)
{}

static void perform_cleanup_stage(struct data_vio *data_vio,
				  enum data_vio_cleanup_stage stage);

/**
 * release_allocated_lock() - Release the PBN lock and/or the reference on the allocated block at
 *			      the end of processing a data_vio.
 */
static void release_allocated_lock(struct vdo_completion *completion)
{}

/** release_lock() - Release an uncontended LBN lock. */
static void release_lock(struct data_vio *data_vio, struct lbn_lock *lock)
{}

/** transfer_lock() - Transfer a contended LBN lock to the eldest waiter. */
static void transfer_lock(struct data_vio *data_vio, struct lbn_lock *lock)
{}

/**
 * release_logical_lock() - Release the logical block lock and flush generation lock at the end of
 *			    processing a data_vio.
 */
static void release_logical_lock(struct vdo_completion *completion)
{}

/** clean_hash_lock() - Release the hash lock at the end of processing a data_vio. */
static void clean_hash_lock(struct vdo_completion *completion)
{}

/**
 * finish_cleanup() - Make some assertions about a data_vio which has finished cleaning up.
 *
 * If it is part of a multi-block discard, starts on the next block, otherwise, returns it to the
 * pool.
 */
static void finish_cleanup(struct data_vio *data_vio)
{}

/** perform_cleanup_stage() - Perform the next step in the process of cleaning up a data_vio. */
static void perform_cleanup_stage(struct data_vio *data_vio,
				  enum data_vio_cleanup_stage stage)
{}

void complete_data_vio(struct vdo_completion *completion)
{}

static void enter_read_only_mode(struct vdo_completion *completion)
{}

void handle_data_vio_error(struct vdo_completion *completion)
{}

/**
 * get_data_vio_operation_name() - Get the name of the last asynchronous operation performed on a
 *				   data_vio.
 */
const char *get_data_vio_operation_name(struct data_vio *data_vio)
{}

/**
 * data_vio_allocate_data_block() - Allocate a data block.
 *
 * @write_lock_type: The type of write lock to obtain on the block.
 * @callback: The callback which will attempt an allocation in the current zone and continue if it
 *	      succeeds.
 * @error_handler: The handler for errors while allocating.
 */
void data_vio_allocate_data_block(struct data_vio *data_vio,
				  enum pbn_lock_type write_lock_type,
				  vdo_action_fn callback, vdo_action_fn error_handler)
{}

/**
 * release_data_vio_allocation_lock() - Release the PBN lock on a data_vio's allocated block.
 * @reset: If true, the allocation will be reset (i.e. any allocated pbn will be forgotten).
 *
 * If the reference to the locked block is still provisional, it will be released as well.
 */
void release_data_vio_allocation_lock(struct data_vio *data_vio, bool reset)
{}

/**
 * uncompress_data_vio() - Uncompress the data a data_vio has just read.
 * @mapping_state: The mapping state indicating which fragment to decompress.
 * @buffer: The buffer to receive the uncompressed data.
 */
int uncompress_data_vio(struct data_vio *data_vio,
			enum block_mapping_state mapping_state, char *buffer)
{}

/**
 * modify_for_partial_write() - Do the modify-write part of a read-modify-write cycle.
 * @completion: The data_vio which has just finished its read.
 *
 * This callback is registered in read_block().
 */
static void modify_for_partial_write(struct vdo_completion *completion)
{}

static void complete_read(struct vdo_completion *completion)
{}

static void read_endio(struct bio *bio)
{}

static void complete_zero_read(struct vdo_completion *completion)
{}

/**
 * read_block() - Read a block asynchronously.
 *
 * This is the callback registered in read_block_mapping().
 */
static void read_block(struct vdo_completion *completion)
{}

static inline struct data_vio *
reference_count_update_completion_as_data_vio(struct vdo_completion *completion)
{}

/**
 * update_block_map() - Rendezvous of the data_vio and decrement completions after each has
 *                      made its reference updates. Handle any error from either, or proceed
 *                      to updating the block map.
 * @completion: The completion of the write in progress.
 */
static void update_block_map(struct vdo_completion *completion)
{}

static void decrement_reference_count(struct vdo_completion *completion)
{}

static void increment_reference_count(struct vdo_completion *completion)
{}

/** journal_remapping() - Add a recovery journal entry for a data remapping. */
static void journal_remapping(struct vdo_completion *completion)
{}

/**
 * read_old_block_mapping() - Get the previous PBN/LBN mapping of an in-progress write.
 *
 * Gets the previous PBN mapped to this LBN from the block map, so as to make an appropriate
 * journal entry referencing the removal of this LBN->PBN mapping.
 */
static void read_old_block_mapping(struct vdo_completion *completion)
{}

void update_metadata_for_data_vio_write(struct data_vio *data_vio, struct pbn_lock *lock)
{}

/**
 * pack_compressed_data() - Attempt to pack the compressed data_vio into a block.
 *
 * This is the callback registered in launch_compress_data_vio().
 */
static void pack_compressed_data(struct vdo_completion *completion)
{}

/**
 * compress_data_vio() - Do the actual work of compressing the data on a CPU queue.
 *
 * This callback is registered in launch_compress_data_vio().
 */
static void compress_data_vio(struct vdo_completion *completion)
{}

/**
 * launch_compress_data_vio() - Continue a write by attempting to compress the data.
 *
 * This is a re-entry point to vio_write used by hash locks.
 */
void launch_compress_data_vio(struct data_vio *data_vio)
{}

/**
 * hash_data_vio() - Hash the data in a data_vio and set the hash zone (which also flags the record
 *		     name as set).

 * This callback is registered in prepare_for_dedupe().
 */
static void hash_data_vio(struct vdo_completion *completion)
{}

/** prepare_for_dedupe() - Prepare for the dedupe path after attempting to get an allocation. */
static void prepare_for_dedupe(struct data_vio *data_vio)
{}

/**
 * write_bio_finished() - This is the bio_end_io function registered in write_block() to be called
 *			  when a data_vio's write to the underlying storage has completed.
 */
static void write_bio_finished(struct bio *bio)
{}

/** write_data_vio() - Write a data block to storage without compression. */
void write_data_vio(struct data_vio *data_vio)
{}

/**
 * acknowledge_write_callback() - Acknowledge a write to the requestor.
 *
 * This callback is registered in allocate_block() and continue_write_with_block_map_slot().
 */
static void acknowledge_write_callback(struct vdo_completion *completion)
{}

/**
 * allocate_block() - Attempt to allocate a block in the current allocation zone.
 *
 * This callback is registered in continue_write_with_block_map_slot().
 */
static void allocate_block(struct vdo_completion *completion)
{}

/**
 * handle_allocation_error() - Handle an error attempting to allocate a block.
 *
 * This error handler is registered in continue_write_with_block_map_slot().
 */
static void handle_allocation_error(struct vdo_completion *completion)
{}

static int assert_is_discard(struct data_vio *data_vio)
{}

/**
 * continue_data_vio_with_block_map_slot() - Read the data_vio's mapping from the block map.
 *
 * This callback is registered in launch_read_data_vio().
 */
void continue_data_vio_with_block_map_slot(struct vdo_completion *completion)
{}