// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2023 Red Hat */ #include "slab-depot.h" #include <linux/atomic.h> #include <linux/bio.h> #include <linux/err.h> #include <linux/log2.h> #include <linux/min_heap.h> #include <linux/minmax.h> #include "logger.h" #include "memory-alloc.h" #include "numeric.h" #include "permassert.h" #include "string-utils.h" #include "action-manager.h" #include "admin-state.h" #include "completion.h" #include "constants.h" #include "data-vio.h" #include "encodings.h" #include "io-submitter.h" #include "physical-zone.h" #include "priority-table.h" #include "recovery-journal.h" #include "repair.h" #include "status-codes.h" #include "types.h" #include "vdo.h" #include "vio.h" #include "wait-queue.h" static const u64 BYTES_PER_WORD = …; static const bool NORMAL_OPERATION = …; /** * get_lock() - Get the lock object for a slab journal block by sequence number. * @journal: vdo_slab journal to retrieve from. * @sequence_number: Sequence number of the block. * * Return: The lock object for the given sequence number. */ static inline struct journal_lock * __must_check get_lock(struct slab_journal *journal, sequence_number_t sequence_number) { … } static bool is_slab_open(struct vdo_slab *slab) { … } /** * must_make_entries_to_flush() - Check whether there are entry waiters which should delay a flush. * @journal: The journal to check. * * Return: true if there are no entry waiters, or if the slab is unrecovered. */ static inline bool __must_check must_make_entries_to_flush(struct slab_journal *journal) { … } /** * is_reaping() - Check whether a reap is currently in progress. * @journal: The journal which may be reaping. * * Return: true if the journal is reaping. */ static inline bool __must_check is_reaping(struct slab_journal *journal) { … } /** * initialize_tail_block() - Initialize tail block as a new block. * @journal: The journal whose tail block is being initialized. */ static void initialize_tail_block(struct slab_journal *journal) { … } /** * initialize_journal_state() - Set all journal fields appropriately to start journaling. * @journal: The journal to be reset, based on its tail sequence number. */ static void initialize_journal_state(struct slab_journal *journal) { … } /** * block_is_full() - Check whether a journal block is full. * @journal: The slab journal for the block. * * Return: true if the tail block is full. */ static bool __must_check block_is_full(struct slab_journal *journal) { … } static void add_entries(struct slab_journal *journal); static void update_tail_block_location(struct slab_journal *journal); static void release_journal_locks(struct vdo_waiter *waiter, void *context); /** * is_slab_journal_blank() - Check whether a slab's journal is blank. * * A slab journal is blank if it has never had any entries recorded in it. * * Return: true if the slab's journal has never been modified. */ static bool is_slab_journal_blank(const struct vdo_slab *slab) { … } /** * mark_slab_journal_dirty() - Put a slab journal on the dirty ring of its allocator in the correct * order. * @journal: The journal to be marked dirty. * @lock: The recovery journal lock held by the slab journal. */ static void mark_slab_journal_dirty(struct slab_journal *journal, sequence_number_t lock) { … } static void mark_slab_journal_clean(struct slab_journal *journal) { … } static void check_if_slab_drained(struct vdo_slab *slab) { … } /* FULLNESS HINT COMPUTATION */ /** * compute_fullness_hint() - Translate a slab's free block count into a 'fullness hint' that can be * stored in a slab_summary_entry's 7 bits that are dedicated to its free * count. * @depot: The depot whose summary being updated. * @free_blocks: The number of free blocks. * * Note: the number of free blocks must be strictly less than 2^23 blocks, even though * theoretically slabs could contain precisely 2^23 blocks; there is an assumption that at least * one block is used by metadata. This assumption is necessary; otherwise, the fullness hint might * overflow. The fullness hint formula is roughly (fullness >> 16) & 0x7f, but (2^23 >> 16) & 0x7f * is 0, which would make it impossible to distinguish completely full from completely empty. * * Return: A fullness hint, which can be stored in 7 bits. */ static u8 __must_check compute_fullness_hint(struct slab_depot *depot, block_count_t free_blocks) { … } /** * check_summary_drain_complete() - Check whether an allocators summary has finished draining. */ static void check_summary_drain_complete(struct block_allocator *allocator) { … } /** * notify_summary_waiters() - Wake all the waiters in a given queue. * @allocator: The block allocator summary which owns the queue. * @queue: The queue to notify. */ static void notify_summary_waiters(struct block_allocator *allocator, struct vdo_wait_queue *queue) { … } static void launch_write(struct slab_summary_block *summary_block); /** * finish_updating_slab_summary_block() - Finish processing a block which attempted to write, * whether or not the attempt succeeded. * @block: The block. */ static void finish_updating_slab_summary_block(struct slab_summary_block *block) { … } /** * finish_update() - This is the callback for a successful summary block write. * @completion: The write vio. */ static void finish_update(struct vdo_completion *completion) { … } /** * handle_write_error() - Handle an error writing a slab summary block. * @completion: The write VIO. */ static void handle_write_error(struct vdo_completion *completion) { … } static void write_slab_summary_endio(struct bio *bio) { … } /** * launch_write() - Write a slab summary block unless it is currently out for writing. * @block: The block that needs to be committed. */ static void launch_write(struct slab_summary_block *block) { … } /** * update_slab_summary_entry() - Update the entry for a slab. * @slab: The slab whose entry is to be updated * @waiter: The waiter that is updating the summary. * @tail_block_offset: The offset of the slab journal's tail block. * @load_ref_counts: Whether the reference counts must be loaded from disk on the vdo load. * @is_clean: Whether the slab is clean. * @free_blocks: The number of free blocks. */ static void update_slab_summary_entry(struct vdo_slab *slab, struct vdo_waiter *waiter, tail_block_offset_t tail_block_offset, bool load_ref_counts, bool is_clean, block_count_t free_blocks) { … } /** * finish_reaping() - Actually advance the head of the journal now that any necessary flushes are * complete. * @journal: The journal to be reaped. */ static void finish_reaping(struct slab_journal *journal) { … } static void reap_slab_journal(struct slab_journal *journal); /** * complete_reaping() - Finish reaping now that we have flushed the lower layer and then try * reaping again in case we deferred reaping due to an outstanding vio. * @completion: The flush vio. */ static void complete_reaping(struct vdo_completion *completion) { … } /** * handle_flush_error() - Handle an error flushing the lower layer. * @completion: The flush vio. */ static void handle_flush_error(struct vdo_completion *completion) { … } static void flush_endio(struct bio *bio) { … } /** * flush_for_reaping() - A waiter callback for getting a vio with which to flush the lower layer * prior to reaping. * @waiter: The journal as a flush waiter. * @context: The newly acquired flush vio. */ static void flush_for_reaping(struct vdo_waiter *waiter, void *context) { … } /** * reap_slab_journal() - Conduct a reap on a slab journal to reclaim unreferenced blocks. * @journal: The slab journal. */ static void reap_slab_journal(struct slab_journal *journal) { … } /** * adjust_slab_journal_block_reference() - Adjust the reference count for a slab journal block. * @journal: The slab journal. * @sequence_number: The journal sequence number of the referenced block. * @adjustment: Amount to adjust the reference counter. * * Note that when the adjustment is negative, the slab journal will be reaped. */ static void adjust_slab_journal_block_reference(struct slab_journal *journal, sequence_number_t sequence_number, int adjustment) { … } /** * release_journal_locks() - Callback invoked after a slab summary update completes. * @waiter: The slab summary waiter that has just been notified. * @context: The result code of the update. * * Registered in the constructor on behalf of update_tail_block_location(). * * Implements waiter_callback_fn. */ static void release_journal_locks(struct vdo_waiter *waiter, void *context) { … } /** * update_tail_block_location() - Update the tail block location in the slab summary, if necessary. * @journal: The slab journal that is updating its tail block location. */ static void update_tail_block_location(struct slab_journal *journal) { … } /** * reopen_slab_journal() - Reopen a slab's journal by emptying it and then adding pending entries. */ static void reopen_slab_journal(struct vdo_slab *slab) { … } static sequence_number_t get_committing_sequence_number(const struct pooled_vio *vio) { … } /** * complete_write() - Handle post-commit processing. * @completion: The write vio as a completion. * * This is the callback registered by write_slab_journal_block(). */ static void complete_write(struct vdo_completion *completion) { … } static void write_slab_journal_endio(struct bio *bio) { … } /** * write_slab_journal_block() - Write a slab journal block. * @waiter: The vio pool waiter which was just notified. * @context: The vio pool entry for the write. * * Callback from acquire_vio_from_pool() registered in commit_tail(). */ static void write_slab_journal_block(struct vdo_waiter *waiter, void *context) { … } /** * commit_tail() - Commit the tail block of the slab journal. * @journal: The journal whose tail block should be committed. */ static void commit_tail(struct slab_journal *journal) { … } /** * encode_slab_journal_entry() - Encode a slab journal entry. * @tail_header: The unpacked header for the block. * @payload: The journal block payload to hold the entry. * @sbn: The slab block number of the entry to encode. * @operation: The type of the entry. * @increment: True if this is an increment. * * Exposed for unit tests. */ static void encode_slab_journal_entry(struct slab_journal_block_header *tail_header, slab_journal_payload *payload, slab_block_number sbn, enum journal_operation operation, bool increment) { … } /** * expand_journal_point() - Convert a recovery journal journal_point which refers to both an * increment and a decrement to a single point which refers to one or the * other. * @recovery_point: The journal point to convert. * @increment: Whether the current entry is an increment. * * Return: The expanded journal point * * Because each data_vio has but a single recovery journal point, but may need to make both * increment and decrement entries in the same slab journal. In order to distinguish the two * entries, the entry count of the expanded journal point is twice the actual recovery journal * entry count for increments, and one more than that for decrements. */ static struct journal_point expand_journal_point(struct journal_point recovery_point, bool increment) { … } /** * add_entry() - Actually add an entry to the slab journal, potentially firing off a write if a * block becomes full. * @journal: The slab journal to append to. * @pbn: The pbn being adjusted. * @operation: The type of entry to make. * @increment: True if this is an increment. * @recovery_point: The expanded recovery point. * * This function is synchronous. */ static void add_entry(struct slab_journal *journal, physical_block_number_t pbn, enum journal_operation operation, bool increment, struct journal_point recovery_point) { … } static inline block_count_t journal_length(const struct slab_journal *journal) { … } /** * vdo_attempt_replay_into_slab() - Replay a recovery journal entry into a slab's journal. * @slab: The slab to play into. * @pbn: The PBN for the entry. * @operation: The type of entry to add. * @increment: True if this entry is an increment. * @recovery_point: The recovery journal point corresponding to this entry. * @parent: The completion to notify when there is space to add the entry if the entry could not be * added immediately. * * Return: true if the entry was added immediately. */ bool vdo_attempt_replay_into_slab(struct vdo_slab *slab, physical_block_number_t pbn, enum journal_operation operation, bool increment, struct journal_point *recovery_point, struct vdo_completion *parent) { … } /** * requires_reaping() - Check whether the journal must be reaped before adding new entries. * @journal: The journal to check. * * Return: true if the journal must be reaped. */ static bool requires_reaping(const struct slab_journal *journal) { … } /** finish_summary_update() - A waiter callback that resets the writing state of a slab. */ static void finish_summary_update(struct vdo_waiter *waiter, void *context) { … } static void write_reference_block(struct vdo_waiter *waiter, void *context); /** * launch_reference_block_write() - Launch the write of a dirty reference block by first acquiring * a VIO for it from the pool. * @waiter: The waiter of the block which is starting to write. * @context: The parent slab of the block. * * This can be asynchronous since the writer will have to wait if all VIOs in the pool are * currently in use. */ static void launch_reference_block_write(struct vdo_waiter *waiter, void *context) { … } static void save_dirty_reference_blocks(struct vdo_slab *slab) { … } /** * finish_reference_block_write() - After a reference block has written, clean it, release its * locks, and return its VIO to the pool. * @completion: The VIO that just finished writing. */ static void finish_reference_block_write(struct vdo_completion *completion) { … } /** * get_reference_counters_for_block() - Find the reference counters for a given block. * @block: The reference_block in question. * * Return: A pointer to the reference counters for this block. */ static vdo_refcount_t * __must_check get_reference_counters_for_block(struct reference_block *block) { … } /** * pack_reference_block() - Copy data from a reference block to a buffer ready to be written out. * @block: The block to copy. * @buffer: The char buffer to fill with the packed block. */ static void pack_reference_block(struct reference_block *block, void *buffer) { … } static void write_reference_block_endio(struct bio *bio) { … } /** * handle_io_error() - Handle an I/O error reading or writing a reference count block. * @completion: The VIO doing the I/O as a completion. */ static void handle_io_error(struct vdo_completion *completion) { … } /** * write_reference_block() - After a dirty block waiter has gotten a VIO from the VIO pool, copy * its counters and associated data into the VIO, and launch the write. * @waiter: The waiter of the dirty block. * @context: The VIO returned by the pool. */ static void write_reference_block(struct vdo_waiter *waiter, void *context) { … } static void reclaim_journal_space(struct slab_journal *journal) { … } /** * reference_count_to_status() - Convert a reference count to a reference status. * @count: The count to convert. * * Return: The appropriate reference status. */ static enum reference_status __must_check reference_count_to_status(vdo_refcount_t count) { … } /** * dirty_block() - Mark a reference count block as dirty, potentially adding it to the dirty queue * if it wasn't already dirty. * @block: The reference block to mark as dirty. */ static void dirty_block(struct reference_block *block) { … } /** * get_reference_block() - Get the reference block that covers the given block index. */ static struct reference_block * __must_check get_reference_block(struct vdo_slab *slab, slab_block_number index) { … } /** * slab_block_number_from_pbn() - Determine the index within the slab of a particular physical * block number. * @slab: The slab. * @physical_block_number: The physical block number. * @slab_block_number_ptr: A pointer to the slab block number. * * Return: VDO_SUCCESS or an error code. */ static int __must_check slab_block_number_from_pbn(struct vdo_slab *slab, physical_block_number_t pbn, slab_block_number *slab_block_number_ptr) { … } /** * get_reference_counter() - Get the reference counter that covers the given physical block number. * @slab: The slab to query. * @pbn: The physical block number. * @counter_ptr: A pointer to the reference counter. */ static int __must_check get_reference_counter(struct vdo_slab *slab, physical_block_number_t pbn, vdo_refcount_t **counter_ptr) { … } static unsigned int calculate_slab_priority(struct vdo_slab *slab) { … } /* * Slabs are essentially prioritized by an approximation of the number of free blocks in the slab * so slabs with lots of free blocks will be opened for allocation before slabs that have few free * blocks. */ static void prioritize_slab(struct vdo_slab *slab) { … } /** * adjust_free_block_count() - Adjust the free block count and (if needed) reprioritize the slab. * @incremented: true if the free block count went up. */ static void adjust_free_block_count(struct vdo_slab *slab, bool incremented) { … } /** * increment_for_data() - Increment the reference count for a data block. * @slab: The slab which owns the block. * @block: The reference block which contains the block being updated. * @block_number: The block to update. * @old_status: The reference status of the data block before this increment. * @lock: The pbn_lock associated with this increment (may be NULL). * @counter_ptr: A pointer to the count for the data block (in, out). * @adjust_block_count: Whether to update the allocator's free block count. * * Return: VDO_SUCCESS or an error. */ static int increment_for_data(struct vdo_slab *slab, struct reference_block *block, slab_block_number block_number, enum reference_status old_status, struct pbn_lock *lock, vdo_refcount_t *counter_ptr, bool adjust_block_count) { … } /** * decrement_for_data() - Decrement the reference count for a data block. * @slab: The slab which owns the block. * @block: The reference block which contains the block being updated. * @block_number: The block to update. * @old_status: The reference status of the data block before this decrement. * @updater: The reference updater doing this operation in case we need to look up the pbn lock. * @lock: The pbn_lock associated with the block being decremented (may be NULL). * @counter_ptr: A pointer to the count for the data block (in, out). * @adjust_block_count: Whether to update the allocator's free block count. * * Return: VDO_SUCCESS or an error. */ static int decrement_for_data(struct vdo_slab *slab, struct reference_block *block, slab_block_number block_number, enum reference_status old_status, struct reference_updater *updater, vdo_refcount_t *counter_ptr, bool adjust_block_count) { … } /** * increment_for_block_map() - Increment the reference count for a block map page. * @slab: The slab which owns the block. * @block: The reference block which contains the block being updated. * @block_number: The block to update. * @old_status: The reference status of the block before this increment. * @lock: The pbn_lock associated with this increment (may be NULL). * @normal_operation: Whether we are in normal operation vs. recovery or rebuild. * @counter_ptr: A pointer to the count for the block (in, out). * @adjust_block_count: Whether to update the allocator's free block count. * * All block map increments should be from provisional to MAXIMUM_REFERENCE_COUNT. Since block map * blocks never dedupe they should never be adjusted from any other state. The adjustment always * results in MAXIMUM_REFERENCE_COUNT as this value is used to prevent dedupe against block map * blocks. * * Return: VDO_SUCCESS or an error. */ static int increment_for_block_map(struct vdo_slab *slab, struct reference_block *block, slab_block_number block_number, enum reference_status old_status, struct pbn_lock *lock, bool normal_operation, vdo_refcount_t *counter_ptr, bool adjust_block_count) { … } static bool __must_check is_valid_journal_point(const struct journal_point *point) { … } /** * update_reference_count() - Update the reference count of a block. * @slab: The slab which owns the block. * @block: The reference block which contains the block being updated. * @block_number: The block to update. * @slab_journal_point: The slab journal point at which this update is journaled. * @updater: The reference updater. * @normal_operation: Whether we are in normal operation vs. recovery or rebuild. * @adjust_block_count: Whether to update the slab's free block count. * @provisional_decrement_ptr: A pointer which will be set to true if this update was a decrement * of a provisional reference. * * Return: VDO_SUCCESS or an error. */ static int update_reference_count(struct vdo_slab *slab, struct reference_block *block, slab_block_number block_number, const struct journal_point *slab_journal_point, struct reference_updater *updater, bool normal_operation, bool adjust_block_count, bool *provisional_decrement_ptr) { … } static int __must_check adjust_reference_count(struct vdo_slab *slab, struct reference_updater *updater, const struct journal_point *slab_journal_point) { … } /** * add_entry_from_waiter() - Add an entry to the slab journal. * @waiter: The vio which should make an entry now. * @context: The slab journal to make an entry in. * * This callback is invoked by add_entries() once it has determined that we are ready to make * another entry in the slab journal. Implements waiter_callback_fn. */ static void add_entry_from_waiter(struct vdo_waiter *waiter, void *context) { … } /** * is_next_entry_a_block_map_increment() - Check whether the next entry to be made is a block map * increment. * @journal: The journal. * * Return: true if the first entry waiter's operation is a block map increment. */ static inline bool is_next_entry_a_block_map_increment(struct slab_journal *journal) { … } /** * add_entries() - Add as many entries as possible from the queue of vios waiting to make entries. * @journal: The journal to which entries may be added. * * By processing the queue in order, we ensure that slab journal entries are made in the same order * as recovery journal entries for the same increment or decrement. */ static void add_entries(struct slab_journal *journal) { … } /** * reset_search_cursor() - Reset the free block search back to the first reference counter in the * first reference block of a slab. */ static void reset_search_cursor(struct vdo_slab *slab) { … } /** * advance_search_cursor() - Advance the search cursor to the start of the next reference block in * a slab, * * Wraps around to the first reference block if the current block is the last reference block. * * Return: true unless the cursor was at the last reference block. */ static bool advance_search_cursor(struct vdo_slab *slab) { … } /** * vdo_adjust_reference_count_for_rebuild() - Adjust the reference count of a block during rebuild. * * Return: VDO_SUCCESS or an error. */ int vdo_adjust_reference_count_for_rebuild(struct slab_depot *depot, physical_block_number_t pbn, enum journal_operation operation) { … } /** * replay_reference_count_change() - Replay the reference count adjustment from a slab journal * entry into the reference count for a block. * @slab: The slab. * @entry_point: The slab journal point for the entry. * @entry: The slab journal entry being replayed. * * The adjustment will be ignored if it was already recorded in the reference count. * * Return: VDO_SUCCESS or an error code. */ static int replay_reference_count_change(struct vdo_slab *slab, const struct journal_point *entry_point, struct slab_journal_entry entry) { … } /** * find_zero_byte_in_word() - Find the array index of the first zero byte in word-sized range of * reference counters. * @word_ptr: A pointer to the eight counter bytes to check. * @start_index: The array index corresponding to word_ptr[0]. * @fail_index: The array index to return if no zero byte is found. * * The search does no bounds checking; the function relies on the array being sufficiently padded. * * Return: The array index of the first zero byte in the word, or the value passed as fail_index if * no zero byte was found. */ static inline slab_block_number find_zero_byte_in_word(const u8 *word_ptr, slab_block_number start_index, slab_block_number fail_index) { … } /** * find_free_block() - Find the first block with a reference count of zero in the specified * range of reference counter indexes. * @slab: The slab counters to scan. * @index_ptr: A pointer to hold the array index of the free block. * * Exposed for unit testing. * * Return: true if a free block was found in the specified range. */ static bool find_free_block(const struct vdo_slab *slab, slab_block_number *index_ptr) { … } /** * search_current_reference_block() - Search the reference block currently saved in the search * cursor for a reference count of zero, starting at the saved * counter index. * @slab: The slab to search. * @free_index_ptr: A pointer to receive the array index of the zero reference count. * * Return: true if an unreferenced counter was found. */ static bool search_current_reference_block(const struct vdo_slab *slab, slab_block_number *free_index_ptr) { … } /** * search_reference_blocks() - Search each reference block for a reference count of zero. * @slab: The slab to search. * @free_index_ptr: A pointer to receive the array index of the zero reference count. * * Searches each reference block for a reference count of zero, starting at the reference block and * counter index saved in the search cursor and searching up to the end of the last reference * block. The search does not wrap. * * Return: true if an unreferenced counter was found. */ static bool search_reference_blocks(struct vdo_slab *slab, slab_block_number *free_index_ptr) { … } /** * make_provisional_reference() - Do the bookkeeping for making a provisional reference. */ static void make_provisional_reference(struct vdo_slab *slab, slab_block_number block_number) { … } /** * dirty_all_reference_blocks() - Mark all reference count blocks in a slab as dirty. */ static void dirty_all_reference_blocks(struct vdo_slab *slab) { … } /** * clear_provisional_references() - Clear the provisional reference counts from a reference block. * @block: The block to clear. */ static void clear_provisional_references(struct reference_block *block) { … } static inline bool journal_points_equal(struct journal_point first, struct journal_point second) { … } /** * unpack_reference_block() - Unpack reference counts blocks into the internal memory structure. * @packed: The written reference block to be unpacked. * @block: The internal reference block to be loaded. */ static void unpack_reference_block(struct packed_reference_block *packed, struct reference_block *block) { … } /** * finish_reference_block_load() - After a reference block has been read, unpack it. * @completion: The VIO that just finished reading. */ static void finish_reference_block_load(struct vdo_completion *completion) { … } static void load_reference_block_endio(struct bio *bio) { … } /** * load_reference_block() - After a block waiter has gotten a VIO from the VIO pool, load the * block. * @waiter: The waiter of the block to load. * @context: The VIO returned by the pool. */ static void load_reference_block(struct vdo_waiter *waiter, void *context) { … } /** * load_reference_blocks() - Load a slab's reference blocks from the underlying storage into a * pre-allocated reference counter. */ static void load_reference_blocks(struct vdo_slab *slab) { … } /** * drain_slab() - Drain all reference count I/O. * * Depending upon the type of drain being performed (as recorded in the ref_count's vdo_slab), the * reference blocks may be loaded from disk or dirty reference blocks may be written out. */ static void drain_slab(struct vdo_slab *slab) { … } static int allocate_slab_counters(struct vdo_slab *slab) { … } static int allocate_counters_if_clean(struct vdo_slab *slab) { … } static void finish_loading_journal(struct vdo_completion *completion) { … } static void read_slab_journal_tail_endio(struct bio *bio) { … } static void handle_load_error(struct vdo_completion *completion) { … } /** * read_slab_journal_tail() - Read the slab journal tail block by using a vio acquired from the vio * pool. * @waiter: The vio pool waiter which has just been notified. * @context: The vio pool entry given to the waiter. * * This is the success callback from acquire_vio_from_pool() when loading a slab journal. */ static void read_slab_journal_tail(struct vdo_waiter *waiter, void *context) { … } /** * load_slab_journal() - Load a slab's journal by reading the journal's tail. */ static void load_slab_journal(struct vdo_slab *slab) { … } static void register_slab_for_scrubbing(struct vdo_slab *slab, bool high_priority) { … } /* Queue a slab for allocation or scrubbing. */ static void queue_slab(struct vdo_slab *slab) { … } /** * initiate_slab_action() - Initiate a slab action. * * Implements vdo_admin_initiator_fn. */ static void initiate_slab_action(struct admin_state *state) { … } /** * get_next_slab() - Get the next slab to scrub. * @scrubber: The slab scrubber. * * Return: The next slab to scrub or NULL if there are none. */ static struct vdo_slab *get_next_slab(struct slab_scrubber *scrubber) { … } /** * has_slabs_to_scrub() - Check whether a scrubber has slabs to scrub. * @scrubber: The scrubber to check. * * Return: true if the scrubber has slabs to scrub. */ static inline bool __must_check has_slabs_to_scrub(struct slab_scrubber *scrubber) { … } /** * uninitialize_scrubber_vio() - Clean up the slab_scrubber's vio. * @scrubber: The scrubber. */ static void uninitialize_scrubber_vio(struct slab_scrubber *scrubber) { … } /** * finish_scrubbing() - Stop scrubbing, either because there are no more slabs to scrub or because * there's been an error. * @scrubber: The scrubber. */ static void finish_scrubbing(struct slab_scrubber *scrubber, int result) { … } static void scrub_next_slab(struct slab_scrubber *scrubber); /** * slab_scrubbed() - Notify the scrubber that a slab has been scrubbed. * @completion: The slab rebuild completion. * * This callback is registered in apply_journal_entries(). */ static void slab_scrubbed(struct vdo_completion *completion) { … } /** * abort_scrubbing() - Abort scrubbing due to an error. * @scrubber: The slab scrubber. * @result: The error. */ static void abort_scrubbing(struct slab_scrubber *scrubber, int result) { … } /** * handle_scrubber_error() - Handle errors while rebuilding a slab. * @completion: The slab rebuild completion. */ static void handle_scrubber_error(struct vdo_completion *completion) { … } /** * apply_block_entries() - Apply all the entries in a block to the reference counts. * @block: A block with entries to apply. * @entry_count: The number of entries to apply. * @block_number: The sequence number of the block. * @slab: The slab to apply the entries to. * * Return: VDO_SUCCESS or an error code. */ static int apply_block_entries(struct packed_slab_journal_block *block, journal_entry_count_t entry_count, sequence_number_t block_number, struct vdo_slab *slab) { … } /** * apply_journal_entries() - Find the relevant vio of the slab journal and apply all valid entries. * @completion: The metadata read vio completion. * * This is a callback registered in start_scrubbing(). */ static void apply_journal_entries(struct vdo_completion *completion) { … } static void read_slab_journal_endio(struct bio *bio) { … } /** * start_scrubbing() - Read the current slab's journal from disk now that it has been flushed. * @completion: The scrubber's vio completion. * * This callback is registered in scrub_next_slab(). */ static void start_scrubbing(struct vdo_completion *completion) { … } /** * scrub_next_slab() - Scrub the next slab if there is one. * @scrubber: The scrubber. */ static void scrub_next_slab(struct slab_scrubber *scrubber) { … } /** * scrub_slabs() - Scrub all of an allocator's slabs that are eligible for scrubbing. * @allocator: The block_allocator to scrub. * @parent: The completion to notify when scrubbing is done, implies high_priority, may be NULL. */ static void scrub_slabs(struct block_allocator *allocator, struct vdo_completion *parent) { … } static inline void assert_on_allocator_thread(thread_id_t thread_id, const char *function_name) { … } static void register_slab_with_allocator(struct block_allocator *allocator, struct vdo_slab *slab) { … } /** * get_depot_slab_iterator() - Return a slab_iterator over the slabs in a slab_depot. * @depot: The depot over which to iterate. * @start: The number of the slab to start iterating from. * @end: The number of the last slab which may be returned. * @stride: The difference in slab number between successive slabs. * * Iteration always occurs from higher to lower numbered slabs. * * Return: An initialized iterator structure. */ static struct slab_iterator get_depot_slab_iterator(struct slab_depot *depot, slab_count_t start, slab_count_t end, slab_count_t stride) { … } static struct slab_iterator get_slab_iterator(const struct block_allocator *allocator) { … } /** * next_slab() - Get the next slab from a slab_iterator and advance the iterator * @iterator: The slab_iterator. * * Return: The next slab or NULL if the iterator is exhausted. */ static struct vdo_slab *next_slab(struct slab_iterator *iterator) { … } /** * abort_waiter() - Abort vios waiting to make journal entries when read-only. * * This callback is invoked on all vios waiting to make slab journal entries after the VDO has gone * into read-only mode. Implements waiter_callback_fn. */ static void abort_waiter(struct vdo_waiter *waiter, void *context __always_unused) { … } /* Implements vdo_read_only_notification_fn. */ static void notify_block_allocator_of_read_only_mode(void *listener, struct vdo_completion *parent) { … } /** * vdo_acquire_provisional_reference() - Acquire a provisional reference on behalf of a PBN lock if * the block it locks is unreferenced. * @slab: The slab which contains the block. * @pbn: The physical block to reference. * @lock: The lock. * * Return: VDO_SUCCESS or an error. */ int vdo_acquire_provisional_reference(struct vdo_slab *slab, physical_block_number_t pbn, struct pbn_lock *lock) { … } static int __must_check allocate_slab_block(struct vdo_slab *slab, physical_block_number_t *block_number_ptr) { … } /** * open_slab() - Prepare a slab to be allocated from. * @slab: The slab. */ static void open_slab(struct vdo_slab *slab) { … } /* * The block allocated will have a provisional reference and the reference must be either confirmed * with a subsequent increment or vacated with a subsequent decrement via * vdo_release_block_reference(). */ int vdo_allocate_block(struct block_allocator *allocator, physical_block_number_t *block_number_ptr) { … } /** * vdo_enqueue_clean_slab_waiter() - Wait for a clean slab. * @allocator: The block_allocator on which to wait. * @waiter: The waiter. * * Return: VDO_SUCCESS if the waiter was queued, VDO_NO_SPACE if there are no slabs to scrub, and * some other error otherwise. */ int vdo_enqueue_clean_slab_waiter(struct block_allocator *allocator, struct vdo_waiter *waiter) { … } /** * vdo_modify_reference_count() - Modify the reference count of a block by first making a slab * journal entry and then updating the reference counter. * * @data_vio: The data_vio for which to add the entry. * @updater: Which of the data_vio's reference updaters is being submitted. */ void vdo_modify_reference_count(struct vdo_completion *completion, struct reference_updater *updater) { … } /* Release an unused provisional reference. */ int vdo_release_block_reference(struct block_allocator *allocator, physical_block_number_t pbn) { … } /* * This is a min_heap callback function orders slab_status structures using the 'is_clean' field as * the primary key and the 'emptiness' field as the secondary key. * * Slabs need to be pushed onto the rings in the same order they are to be popped off. Popping * should always get the most empty first, so pushing should be from most empty to least empty. * Thus, the ordering is reversed from the usual sense since min_heap returns smaller elements * before larger ones. */ static bool slab_status_is_less_than(const void *item1, const void *item2, void __always_unused *args) { … } static void swap_slab_statuses(void *item1, void *item2, void __always_unused *args) { … } static const struct min_heap_callbacks slab_status_min_heap = …; /* Inform the slab actor that a action has finished on some slab; used by apply_to_slabs(). */ static void slab_action_callback(struct vdo_completion *completion) { … } /* Preserve the error from part of an action and continue. */ static void handle_operation_error(struct vdo_completion *completion) { … } /* Perform an action on each of an allocator's slabs in parallel. */ static void apply_to_slabs(struct block_allocator *allocator, vdo_action_fn callback) { … } static void finish_loading_allocator(struct vdo_completion *completion) { … } static void erase_next_slab_journal(struct block_allocator *allocator); static void copy_callback(int read_err, unsigned long write_err, void *context) { … } /* erase_next_slab_journal() - Erase the next slab journal. */ static void erase_next_slab_journal(struct block_allocator *allocator) { … } /* Implements vdo_admin_initiator_fn. */ static void initiate_load(struct admin_state *state) { … } /** * vdo_notify_slab_journals_are_recovered() - Inform a block allocator that its slab journals have * been recovered from the recovery journal. * @completion The allocator completion */ void vdo_notify_slab_journals_are_recovered(struct vdo_completion *completion) { … } static int get_slab_statuses(struct block_allocator *allocator, struct slab_status **statuses_ptr) { … } /* Prepare slabs for allocation or scrubbing. */ static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator *allocator) { … } static const char *status_to_string(enum slab_rebuild_status status) { … } void vdo_dump_block_allocator(const struct block_allocator *allocator) { … } static void free_slab(struct vdo_slab *slab) { … } static int initialize_slab_journal(struct vdo_slab *slab) { … } /** * make_slab() - Construct a new, empty slab. * @slab_origin: The physical block number within the block allocator partition of the first block * in the slab. * @allocator: The block allocator to which the slab belongs. * @slab_number: The slab number of the slab. * @is_new: true if this slab is being allocated as part of a resize. * @slab_ptr: A pointer to receive the new slab. * * Return: VDO_SUCCESS or an error code. */ static int __must_check make_slab(physical_block_number_t slab_origin, struct block_allocator *allocator, slab_count_t slab_number, bool is_new, struct vdo_slab **slab_ptr) { … } /** * allocate_slabs() - Allocate a new slab pointer array. * @depot: The depot. * @slab_count: The number of slabs the depot should have in the new array. * * Any existing slab pointers will be copied into the new array, and slabs will be allocated as * needed. The newly allocated slabs will not be distributed for use by the block allocators. * * Return: VDO_SUCCESS or an error code. */ static int allocate_slabs(struct slab_depot *depot, slab_count_t slab_count) { … } /** * vdo_abandon_new_slabs() - Abandon any new slabs in this depot, freeing them as needed. * @depot: The depot. */ void vdo_abandon_new_slabs(struct slab_depot *depot) { … } /** * get_allocator_thread_id() - Get the ID of the thread on which a given allocator operates. * * Implements vdo_zone_thread_getter_fn. */ static thread_id_t get_allocator_thread_id(void *context, zone_count_t zone_number) { … } /** * release_recovery_journal_lock() - Request the slab journal to release the recovery journal lock * it may hold on a specified recovery journal block. * @journal: The slab journal. * @recovery_lock: The sequence number of the recovery journal block whose locks should be * released. * * Return: true if the journal does hold a lock on the specified block (which it will release). */ static bool __must_check release_recovery_journal_lock(struct slab_journal *journal, sequence_number_t recovery_lock) { … } /* * Request a commit of all dirty tail blocks which are locking the recovery journal block the depot * is seeking to release. * * Implements vdo_zone_action_fn. */ static void release_tail_block_locks(void *context, zone_count_t zone_number, struct vdo_completion *parent) { … } /** * prepare_for_tail_block_commit() - Prepare to commit oldest tail blocks. * * Implements vdo_action_preamble_fn. */ static void prepare_for_tail_block_commit(void *context, struct vdo_completion *parent) { … } /** * schedule_tail_block_commit() - Schedule a tail block commit if necessary. * * This method should not be called directly. Rather, call vdo_schedule_default_action() on the * depot's action manager. * * Implements vdo_action_scheduler_fn. */ static bool schedule_tail_block_commit(void *context) { … } /** * initialize_slab_scrubber() - Initialize an allocator's slab scrubber. * @allocator: The allocator being initialized * * Return: VDO_SUCCESS or an error. */ static int initialize_slab_scrubber(struct block_allocator *allocator) { … } /** * initialize_slab_summary_block() - Initialize a slab_summary_block. * @allocator: The allocator which owns the block. * @index: The index of this block in its zone's summary. * * Return: VDO_SUCCESS or an error. */ static int __must_check initialize_slab_summary_block(struct block_allocator *allocator, block_count_t index) { … } static int __must_check initialize_block_allocator(struct slab_depot *depot, zone_count_t zone) { … } static int allocate_components(struct slab_depot *depot, struct partition *summary_partition) { … } /** * vdo_decode_slab_depot() - Make a slab depot and configure it with the state read from the super * block. * @state: The slab depot state from the super block. * @vdo: The VDO which will own the depot. * @summary_partition: The partition which holds the slab summary. * @depot_ptr: A pointer to hold the depot. * * Return: A success or error code. */ int vdo_decode_slab_depot(struct slab_depot_state_2_0 state, struct vdo *vdo, struct partition *summary_partition, struct slab_depot **depot_ptr) { … } static void uninitialize_allocator_summary(struct block_allocator *allocator) { … } /** * vdo_free_slab_depot() - Destroy a slab depot. * @depot: The depot to destroy. */ void vdo_free_slab_depot(struct slab_depot *depot) { … } /** * vdo_record_slab_depot() - Record the state of a slab depot for encoding into the super block. * @depot: The depot to encode. * * Return: The depot state. */ struct slab_depot_state_2_0 vdo_record_slab_depot(const struct slab_depot *depot) { … } /** * vdo_allocate_reference_counters() - Allocate the reference counters for all slabs in the depot. * * Context: This method may be called only before entering normal operation from the load thread. * * Return: VDO_SUCCESS or an error. */ int vdo_allocate_reference_counters(struct slab_depot *depot) { … } /** * get_slab_number() - Get the number of the slab that contains a specified block. * @depot: The slab depot. * @pbn: The physical block number. * @slab_number_ptr: A pointer to hold the slab number. * * Return: VDO_SUCCESS or an error. */ static int __must_check get_slab_number(const struct slab_depot *depot, physical_block_number_t pbn, slab_count_t *slab_number_ptr) { … } /** * vdo_get_slab() - Get the slab object for the slab that contains a specified block. * @depot: The slab depot. * @pbn: The physical block number. * * Will put the VDO in read-only mode if the PBN is not a valid data block nor the zero block. * * Return: The slab containing the block, or NULL if the block number is the zero block or * otherwise out of range. */ struct vdo_slab *vdo_get_slab(const struct slab_depot *depot, physical_block_number_t pbn) { … } /** * vdo_get_increment_limit() - Determine how many new references a block can acquire. * @depot: The slab depot. * @pbn: The physical block number that is being queried. * * Context: This method must be called from the physical zone thread of the PBN. * * Return: The number of available references. */ u8 vdo_get_increment_limit(struct slab_depot *depot, physical_block_number_t pbn) { … } /** * vdo_is_physical_data_block() - Determine whether the given PBN refers to a data block. * @depot: The depot. * @pbn: The physical block number to ask about. * * Return: True if the PBN corresponds to a data block. */ bool vdo_is_physical_data_block(const struct slab_depot *depot, physical_block_number_t pbn) { … } /** * vdo_get_slab_depot_allocated_blocks() - Get the total number of data blocks allocated across all * the slabs in the depot. * @depot: The slab depot. * * This is the total number of blocks with a non-zero reference count. * * Context: This may be called from any thread. * * Return: The total number of blocks with a non-zero reference count. */ block_count_t vdo_get_slab_depot_allocated_blocks(const struct slab_depot *depot) { … } /** * vdo_get_slab_depot_data_blocks() - Get the total number of data blocks in all the slabs in the * depot. * @depot: The slab depot. * * Context: This may be called from any thread. * * Return: The total number of data blocks in all slabs. */ block_count_t vdo_get_slab_depot_data_blocks(const struct slab_depot *depot) { … } /** * finish_combining_zones() - Clean up after saving out the combined slab summary. * @completion: The vio which was used to write the summary data. */ static void finish_combining_zones(struct vdo_completion *completion) { … } static void handle_combining_error(struct vdo_completion *completion) { … } static void write_summary_endio(struct bio *bio) { … } /** * combine_summaries() - Treating the current entries buffer as the on-disk value of all zones, * update every zone to the correct values for every slab. * @depot: The depot whose summary entries should be combined. */ static void combine_summaries(struct slab_depot *depot) { … } /** * finish_loading_summary() - Finish loading slab summary data. * @completion: The vio which was used to read the summary data. * * Combines the slab summary data from all the previously written zones and copies the combined * summary to each partition's data region. Then writes the combined summary back out to disk. This * callback is registered in load_summary_endio(). */ static void finish_loading_summary(struct vdo_completion *completion) { … } static void load_summary_endio(struct bio *bio) { … } /** * load_slab_summary() - The preamble of a load operation. * * Implements vdo_action_preamble_fn. */ static void load_slab_summary(void *context, struct vdo_completion *parent) { … } /* Implements vdo_zone_action_fn. */ static void load_allocator(void *context, zone_count_t zone_number, struct vdo_completion *parent) { … } /** * vdo_load_slab_depot() - Asynchronously load any slab depot state that isn't included in the * super_block component. * @depot: The depot to load. * @operation: The type of load to perform. * @parent: The completion to notify when the load is complete. * @context: Additional context for the load operation; may be NULL. * * This method may be called only before entering normal operation from the load thread. */ void vdo_load_slab_depot(struct slab_depot *depot, const struct admin_state_code *operation, struct vdo_completion *parent, void *context) { … } /* Implements vdo_zone_action_fn. */ static void prepare_to_allocate(void *context, zone_count_t zone_number, struct vdo_completion *parent) { … } /** * vdo_prepare_slab_depot_to_allocate() - Prepare the slab depot to come online and start * allocating blocks. * @depot: The depot to prepare. * @load_type: The load type. * @parent: The completion to notify when the operation is complete. * * This method may be called only before entering normal operation from the load thread. It must be * called before allocation may proceed. */ void vdo_prepare_slab_depot_to_allocate(struct slab_depot *depot, enum slab_depot_load_type load_type, struct vdo_completion *parent) { … } /** * vdo_update_slab_depot_size() - Update the slab depot to reflect its new size in memory. * @depot: The depot to update. * * This size is saved to disk as part of the super block. */ void vdo_update_slab_depot_size(struct slab_depot *depot) { … } /** * vdo_prepare_to_grow_slab_depot() - Allocate new memory needed for a resize of a slab depot to * the given size. * @depot: The depot to prepare to resize. * @partition: The new depot partition * * Return: VDO_SUCCESS or an error. */ int vdo_prepare_to_grow_slab_depot(struct slab_depot *depot, const struct partition *partition) { … } /** * finish_registration() - Finish registering new slabs now that all of the allocators have * received their new slabs. * * Implements vdo_action_conclusion_fn. */ static int finish_registration(void *context) { … } /* Implements vdo_zone_action_fn. */ static void register_new_slabs(void *context, zone_count_t zone_number, struct vdo_completion *parent) { … } /** * vdo_use_new_slabs() - Use the new slabs allocated for resize. * @depot: The depot. * @parent: The object to notify when complete. */ void vdo_use_new_slabs(struct slab_depot *depot, struct vdo_completion *parent) { … } /** * stop_scrubbing() - Tell the scrubber to stop scrubbing after it finishes the slab it is * currently working on. * @scrubber: The scrubber to stop. * @parent: The completion to notify when scrubbing has stopped. */ static void stop_scrubbing(struct block_allocator *allocator) { … } /* Implements vdo_admin_initiator_fn. */ static void initiate_summary_drain(struct admin_state *state) { … } static void do_drain_step(struct vdo_completion *completion) { … } /* Implements vdo_admin_initiator_fn. */ static void initiate_drain(struct admin_state *state) { … } /* * Drain all allocator I/O. Depending upon the type of drain, some or all dirty metadata may be * written to disk. The type of drain will be determined from the state of the allocator's depot. * * Implements vdo_zone_action_fn. */ static void drain_allocator(void *context, zone_count_t zone_number, struct vdo_completion *parent) { … } /** * vdo_drain_slab_depot() - Drain all slab depot I/O. * @depot: The depot to drain. * @operation: The drain operation (flush, rebuild, suspend, or save). * @parent: The completion to finish when the drain is complete. * * If saving, or flushing, all dirty depot metadata will be written out. If saving or suspending, * the depot will be left in a suspended state. */ void vdo_drain_slab_depot(struct slab_depot *depot, const struct admin_state_code *operation, struct vdo_completion *parent) { … } /** * resume_scrubbing() - Tell the scrubber to resume scrubbing if it has been stopped. * @allocator: The allocator being resumed. */ static void resume_scrubbing(struct block_allocator *allocator) { … } static void do_resume_step(struct vdo_completion *completion) { … } /* Implements vdo_admin_initiator_fn. */ static void initiate_resume(struct admin_state *state) { … } /* Implements vdo_zone_action_fn. */ static void resume_allocator(void *context, zone_count_t zone_number, struct vdo_completion *parent) { … } /** * vdo_resume_slab_depot() - Resume a suspended slab depot. * @depot: The depot to resume. * @parent: The completion to finish when the depot has resumed. */ void vdo_resume_slab_depot(struct slab_depot *depot, struct vdo_completion *parent) { … } /** * vdo_commit_oldest_slab_journal_tail_blocks() - Commit all dirty tail blocks which are locking a * given recovery journal block. * @depot: The depot. * @recovery_block_number: The sequence number of the recovery journal block whose locks should be * released. * * Context: This method must be called from the journal zone thread. */ void vdo_commit_oldest_slab_journal_tail_blocks(struct slab_depot *depot, sequence_number_t recovery_block_number) { … } /* Implements vdo_zone_action_fn. */ static void scrub_all_unrecovered_slabs(void *context, zone_count_t zone_number, struct vdo_completion *parent) { … } /** * vdo_scrub_all_unrecovered_slabs() - Scrub all unrecovered slabs. * @depot: The depot to scrub. * @parent: The object to notify when scrubbing has been launched for all zones. */ void vdo_scrub_all_unrecovered_slabs(struct slab_depot *depot, struct vdo_completion *parent) { … } /** * get_block_allocator_statistics() - Get the total of the statistics from all the block allocators * in the depot. * @depot: The slab depot. * * Return: The statistics from all block allocators in the depot. */ static struct block_allocator_statistics __must_check get_block_allocator_statistics(const struct slab_depot *depot) { … } /** * get_ref_counts_statistics() - Get the cumulative ref_counts statistics for the depot. * @depot: The slab depot. * * Return: The cumulative statistics for all ref_counts in the depot. */ static struct ref_counts_statistics __must_check get_ref_counts_statistics(const struct slab_depot *depot) { … } /** * get_slab_journal_statistics() - Get the aggregated slab journal statistics for the depot. * @depot: The slab depot. * * Return: The aggregated statistics for all slab journals in the depot. */ static struct slab_journal_statistics __must_check get_slab_journal_statistics(const struct slab_depot *depot) { … } /** * vdo_get_slab_depot_statistics() - Get all the vdo_statistics fields that are properties of the * slab depot. * @depot: The slab depot. * @stats: The vdo statistics structure to partially fill. */ void vdo_get_slab_depot_statistics(const struct slab_depot *depot, struct vdo_statistics *stats) { … } /** * vdo_dump_slab_depot() - Dump the slab depot, in a thread-unsafe fashion. * @depot: The slab depot. */ void vdo_dump_slab_depot(const struct slab_depot *depot) { … }