/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2023 Red Hat */ #ifndef VDO_RECOVERY_JOURNAL_H #define VDO_RECOVERY_JOURNAL_H #include <linux/list.h> #include "numeric.h" #include "admin-state.h" #include "constants.h" #include "encodings.h" #include "flush.h" #include "statistics.h" #include "types.h" #include "wait-queue.h" /** * DOC: recovery journal. * * The recovery_journal provides a log of all block mapping and reference count changes which have * not yet been stably written to the block map or slab journals. This log helps to reduce the * write amplification of writes by providing amortization of slab journal and block map page * updates. * * The recovery journal has a single dedicated queue and thread for performing all journal updates. * The concurrency guarantees of this single-threaded model allow the code to omit more * fine-grained locking for recovery journal structures. * * The journal consists of a set of on-disk blocks arranged as a circular log with monotonically * increasing sequence numbers. Three sequence numbers serve to define the active extent of the * journal. The 'head' is the oldest active block in the journal. The 'tail' is the end of the * half-open interval containing the active blocks. 'active' is the number of the block actively * receiving entries. In an empty journal, head == active == tail. Once any entries are added, tail * = active + 1, and head may be any value in the interval [tail - size, active]. * * The journal also contains a set of in-memory blocks which are used to buffer up entries until * they can be committed. In general the number of in-memory blocks ('tail_buffer_count') will be * less than the on-disk size. Each in-memory block is also a vdo_completion. Each in-memory block * has a vio which is used to commit that block to disk. The vio's data is the on-disk * representation of the journal block. In addition each in-memory block has a buffer which is used * to accumulate entries while a partial commit of the block is in progress. In-memory blocks are * kept on two rings. Free blocks live on the 'free_tail_blocks' ring. When a block becomes active * (see below) it is moved to the 'active_tail_blocks' ring. When a block is fully committed, it is * moved back to the 'free_tail_blocks' ring. * * When entries are added to the journal, they are added to the active in-memory block, as * indicated by the 'active_block' field. If the caller wishes to wait for the entry to be * committed, the requesting VIO will be attached to the in-memory block to which the caller's * entry was added. If the caller does wish to wait, or if the entry filled the active block, an * attempt will be made to commit that block to disk. If there is already another commit in * progress, the attempt will be ignored and then automatically retried when the in-progress commit * completes. If there is no commit in progress, any data_vios waiting on the block are transferred * to the block's vio which is then written, automatically waking all of the waiters when it * completes. When the write completes, any entries which accumulated in the block are copied to * the vio's data buffer. * * Finally, the journal maintains a set of counters, one for each on disk journal block. These * counters are used as locks to prevent premature reaping of journal blocks. Each time a new * sequence number is used, the counter for the corresponding block is incremented. The counter is * subsequently decremented when that block is filled and then committed for the last time. This * prevents blocks from being reaped while they are still being updated. The counter is also * incremented once for each entry added to a block, and decremented once each time the block map * is updated in memory for that request. This prevents blocks from being reaped while their VIOs * are still active. Finally, each in-memory block map page tracks the oldest journal block that * contains entries corresponding to uncommitted updates to that block map page. Each time an * in-memory block map page is updated, it checks if the journal block for the VIO is earlier than * the one it references, in which case it increments the count on the earlier journal block and * decrements the count on the later journal block, maintaining a lock on the oldest journal block * containing entries for that page. When a block map page has been flushed from the cache, the * counter for the journal block it references is decremented. Whenever the counter for the head * block goes to 0, the head is advanced until it comes to a block whose counter is not 0 or until * it reaches the active block. This is the mechanism for reclaiming journal space on disk. * * If there is no in-memory space when a VIO attempts to add an entry, the VIO will be attached to * the 'commit_completion' and will be woken the next time a full block has committed. If there is * no on-disk space when a VIO attempts to add an entry, the VIO will be attached to the * 'reap_completion', and will be woken the next time a journal block is reaped. */ enum vdo_zone_type { … }; struct lock_counter { … }; struct recovery_journal_block { … }; struct recovery_journal { … }; /** * vdo_get_recovery_journal_block_number() - Get the physical block number for a given sequence * number. * @journal: The journal. * @sequence: The sequence number of the desired block. * * Return: The block number corresponding to the sequence number. */ static inline physical_block_number_t __must_check vdo_get_recovery_journal_block_number(const struct recovery_journal *journal, sequence_number_t sequence) { … } /** * vdo_compute_recovery_journal_check_byte() - Compute the check byte for a given sequence number. * @journal: The journal. * @sequence: The sequence number. * * Return: The check byte corresponding to the sequence number. */ static inline u8 __must_check vdo_compute_recovery_journal_check_byte(const struct recovery_journal *journal, sequence_number_t sequence) { … } int __must_check vdo_decode_recovery_journal(struct recovery_journal_state_7_0 state, nonce_t nonce, struct vdo *vdo, struct partition *partition, u64 recovery_count, block_count_t journal_size, struct recovery_journal **journal_ptr); void vdo_free_recovery_journal(struct recovery_journal *journal); void vdo_initialize_recovery_journal_post_repair(struct recovery_journal *journal, u64 recovery_count, sequence_number_t tail, block_count_t logical_blocks_used, block_count_t block_map_data_blocks); block_count_t __must_check vdo_get_journal_block_map_data_blocks_used(struct recovery_journal *journal); thread_id_t __must_check vdo_get_recovery_journal_thread_id(struct recovery_journal *journal); void vdo_open_recovery_journal(struct recovery_journal *journal, struct slab_depot *depot, struct block_map *block_map); sequence_number_t vdo_get_recovery_journal_current_sequence_number(struct recovery_journal *journal); block_count_t __must_check vdo_get_recovery_journal_length(block_count_t journal_size); struct recovery_journal_state_7_0 __must_check vdo_record_recovery_journal(const struct recovery_journal *journal); void vdo_add_recovery_journal_entry(struct recovery_journal *journal, struct data_vio *data_vio); void vdo_acquire_recovery_journal_block_reference(struct recovery_journal *journal, sequence_number_t sequence_number, enum vdo_zone_type zone_type, zone_count_t zone_id); void vdo_release_recovery_journal_block_reference(struct recovery_journal *journal, sequence_number_t sequence_number, enum vdo_zone_type zone_type, zone_count_t zone_id); void vdo_release_journal_entry_lock(struct recovery_journal *journal, sequence_number_t sequence_number); void vdo_drain_recovery_journal(struct recovery_journal *journal, const struct admin_state_code *operation, struct vdo_completion *parent); void vdo_resume_recovery_journal(struct recovery_journal *journal, struct vdo_completion *parent); block_count_t __must_check vdo_get_recovery_journal_logical_blocks_used(const struct recovery_journal *journal); struct recovery_journal_statistics __must_check vdo_get_recovery_journal_statistics(const struct recovery_journal *journal); void vdo_dump_recovery_journal_statistics(const struct recovery_journal *journal); #endif /* VDO_RECOVERY_JOURNAL_H */