linux/fs/xfs/scrub/rmap_repair.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <[email protected]>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_btree_staging.h"
#include "xfs_buf_mem.h"
#include "xfs_btree_mem.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_ag.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
#include "scrub/agb_bitmap.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/iscan.h"
#include "scrub/newbt.h"
#include "scrub/reap.h"

/*
 * Reverse Mapping Btree Repair
 * ============================
 *
 * This is the most involved of all the AG space btree rebuilds.  Everywhere
 * else in XFS we lock inodes and then AG data structures, but generating the
 * list of rmap records requires that we be able to scan both block mapping
 * btrees of every inode in the filesystem to see if it owns any extents in
 * this AG.  We can't tolerate any inode updates while we do this, so we
 * freeze the filesystem to lock everyone else out, and grant ourselves
 * special privileges to run transactions with regular background reclamation
 * turned off.
 *
 * We also have to be very careful not to allow inode reclaim to start a
 * transaction because all transactions (other than our own) will block.
 * Deferred inode inactivation helps us out there.
 *
 * I) Reverse mappings for all non-space metadata and file data are collected
 * according to the following algorithm:
 *
 * 1. For each fork of each inode:
 * 1.1. Create a bitmap BMBIT to track bmbt blocks if necessary.
 * 1.2. If the incore extent map isn't loaded, walk the bmbt to accumulate
 *      bmaps into rmap records (see 1.1.4).  Set bits in BMBIT for each btree
 *      block.
 * 1.3. If the incore extent map is loaded but the fork is in btree format,
 *      just visit the bmbt blocks to set the corresponding BMBIT areas.
 * 1.4. From the incore extent map, accumulate each bmap that falls into our
 *      target AG.  Remember, multiple bmap records can map to a single rmap
 *      record, so we cannot simply emit rmap records 1:1.
 * 1.5. Emit rmap records for each extent in BMBIT and free it.
 * 2. Create bitmaps INOBIT and ICHUNKBIT.
 * 3. For each record in the inobt, set the corresponding areas in ICHUNKBIT,
 *    and set bits in INOBIT for each btree block.  If the inobt has no records
 *    at all, we must be careful to record its root in INOBIT.
 * 4. For each block in the finobt, set the corresponding INOBIT area.
 * 5. Emit rmap records for each extent in INOBIT and ICHUNKBIT and free them.
 * 6. Create bitmaps REFCBIT and COWBIT.
 * 7. For each CoW staging extent in the refcountbt, set the corresponding
 *    areas in COWBIT.
 * 8. For each block in the refcountbt, set the corresponding REFCBIT area.
 * 9. Emit rmap records for each extent in REFCBIT and COWBIT and free them.
 * A. Emit rmap for the AG headers.
 * B. Emit rmap for the log, if there is one.
 *
 * II) The rmapbt shape and space metadata rmaps are computed as follows:
 *
 * 1. Count the rmaps collected in the previous step. (= NR)
 * 2. Estimate the number of rmapbt blocks needed to store NR records. (= RMB)
 * 3. Reserve RMB blocks through the newbt using the allocator in normap mode.
 * 4. Create bitmap AGBIT.
 * 5. For each reservation in the newbt, set the corresponding areas in AGBIT.
 * 6. For each block in the AGFL, bnobt, and cntbt, set the bits in AGBIT.
 * 7. Count the extents in AGBIT. (= AGNR)
 * 8. Estimate the number of rmapbt blocks needed for NR + AGNR rmaps. (= RMB')
 * 9. If RMB' >= RMB, reserve RMB' - RMB more newbt blocks, set RMB = RMB',
 *    and clear AGBIT.  Go to step 5.
 * A. Emit rmaps for each extent in AGBIT.
 *
 * III) The rmapbt is constructed and set in place as follows:
 *
 * 1. Sort the rmap records.
 * 2. Bulk load the rmaps.
 *
 * IV) Reap the old btree blocks.
 *
 * 1. Create a bitmap OLDRMBIT.
 * 2. For each gap in the new rmapbt, set the corresponding areas of OLDRMBIT.
 * 3. For each extent in the bnobt, clear the corresponding parts of OLDRMBIT.
 * 4. Reap the extents corresponding to the set areas in OLDRMBIT.  These are
 *    the parts of the AG that the rmap didn't find during its scan of the
 *    primary metadata and aren't known to be in the free space, which implies
 *    that they were the old rmapbt blocks.
 * 5. Commit.
 *
 * We use the 'xrep_rmap' prefix for all the rmap functions.
 */

/* Context for collecting rmaps */
struct xrep_rmap {};

/* Set us up to repair reverse mapping btrees. */
int
xrep_setup_ag_rmapbt(
	struct xfs_scrub	*sc)
{}

/* Make sure there's nothing funny about this mapping. */
STATIC int
xrep_rmap_check_mapping(
	struct xfs_scrub	*sc,
	const struct xfs_rmap_irec *rec)
{}

/* Store a reverse-mapping record. */
static inline int
xrep_rmap_stash(
	struct xrep_rmap	*rr,
	xfs_agblock_t		startblock,
	xfs_extlen_t		blockcount,
	uint64_t		owner,
	uint64_t		offset,
	unsigned int		flags)
{}

struct xrep_rmap_stash_run {};

static int
xrep_rmap_stash_run(
	uint32_t			start,
	uint32_t			len,
	void				*priv)
{}

/*
 * Emit rmaps for every extent of bits set in the bitmap.  Caller must ensure
 * that the ranges are in units of FS blocks.
 */
STATIC int
xrep_rmap_stash_bitmap(
	struct xrep_rmap		*rr,
	struct xagb_bitmap		*bitmap,
	const struct xfs_owner_info	*oinfo)
{}

/* Section (I): Finding all file and bmbt extents. */

/* Context for accumulating rmaps for an inode fork. */
struct xrep_rmap_ifork {};

/* Stash an rmap that we accumulated while walking an inode fork. */
STATIC int
xrep_rmap_stash_accumulated(
	struct xrep_rmap_ifork	*rf)
{}

/* Accumulate a bmbt record. */
STATIC int
xrep_rmap_visit_bmbt(
	struct xfs_btree_cur	*cur,
	struct xfs_bmbt_irec	*rec,
	void			*priv)
{}

/* Add a btree block to the bitmap. */
STATIC int
xrep_rmap_visit_iroot_btree_block(
	struct xfs_btree_cur	*cur,
	int			level,
	void			*priv)
{}

/*
 * Iterate a metadata btree rooted in an inode to collect rmap records for
 * anything in this fork that matches the AG.
 */
STATIC int
xrep_rmap_scan_iroot_btree(
	struct xrep_rmap_ifork	*rf,
	struct xfs_btree_cur	*cur)
{}

/*
 * Iterate the block mapping btree to collect rmap records for anything in this
 * fork that matches the AG.  Sets @mappings_done to true if we've scanned the
 * block mappings in this fork.
 */
STATIC int
xrep_rmap_scan_bmbt(
	struct xrep_rmap_ifork	*rf,
	struct xfs_inode	*ip,
	bool			*mappings_done)
{}

/*
 * Iterate the in-core extent cache to collect rmap records for anything in
 * this fork that matches the AG.
 */
STATIC int
xrep_rmap_scan_iext(
	struct xrep_rmap_ifork	*rf,
	struct xfs_ifork	*ifp)
{}

/* Find all the extents from a given AG in an inode fork. */
STATIC int
xrep_rmap_scan_ifork(
	struct xrep_rmap	*rr,
	struct xfs_inode	*ip,
	int			whichfork)
{}

/*
 * Take ILOCK on a file that we want to scan.
 *
 * Select ILOCK_EXCL if the file has an unloaded data bmbt or has an unloaded
 * attr bmbt.  Otherwise, take ILOCK_SHARED.
 */
static inline unsigned int
xrep_rmap_scan_ilock(
	struct xfs_inode	*ip)
{}

/* Record reverse mappings for a file. */
STATIC int
xrep_rmap_scan_inode(
	struct xrep_rmap	*rr,
	struct xfs_inode	*ip)
{}

/* Section (I): Find all AG metadata extents except for free space metadata. */

struct xrep_rmap_inodes {};

/* Record inode btree rmaps. */
STATIC int
xrep_rmap_walk_inobt(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_rec	*rec,
	void				*priv)
{}

/* Collect rmaps for the blocks containing inode btrees and the inode chunks. */
STATIC int
xrep_rmap_find_inode_rmaps(
	struct xrep_rmap	*rr)
{}

/* Record a CoW staging extent. */
STATIC int
xrep_rmap_walk_cowblocks(
	struct xfs_btree_cur		*cur,
	const struct xfs_refcount_irec	*irec,
	void				*priv)
{}

/*
 * Collect rmaps for the blocks containing the refcount btree, and all CoW
 * staging extents.
 */
STATIC int
xrep_rmap_find_refcount_rmaps(
	struct xrep_rmap	*rr)
{}

/* Generate rmaps for the AG headers (AGI/AGF/AGFL) */
STATIC int
xrep_rmap_find_agheader_rmaps(
	struct xrep_rmap	*rr)
{}

/* Generate rmaps for the log, if it's in this AG. */
STATIC int
xrep_rmap_find_log_rmaps(
	struct xrep_rmap	*rr)
{}

/* Check and count all the records that we gathered. */
STATIC int
xrep_rmap_check_record(
	struct xfs_btree_cur		*cur,
	const struct xfs_rmap_irec	*rec,
	void				*priv)
{}

/*
 * Generate all the reverse-mappings for this AG, a list of the old rmapbt
 * blocks, and the new btreeblks count.  Figure out if we have enough free
 * space to reconstruct the inode btrees.  The caller must clean up the lists
 * if anything goes wrong.  This implements section (I) above.
 */
STATIC int
xrep_rmap_find_rmaps(
	struct xrep_rmap	*rr)
{}

/* Section (II): Reserving space for new rmapbt and setting free space bitmap */

struct xrep_rmap_agfl {};

/* Add an AGFL block to the rmap list. */
STATIC int
xrep_rmap_walk_agfl(
	struct xfs_mount	*mp,
	xfs_agblock_t		agbno,
	void			*priv)
{}

/*
 * Run one round of reserving space for the new rmapbt and recomputing the
 * number of blocks needed to store the previously observed rmapbt records and
 * the ones we'll create for the free space metadata.  When we don't need more
 * blocks, return a bitmap of OWN_AG extents in @freesp_blocks and set @done to
 * true.
 */
STATIC int
xrep_rmap_try_reserve(
	struct xrep_rmap	*rr,
	struct xfs_btree_cur	*rmap_cur,
	struct xagb_bitmap	*freesp_blocks,
	uint64_t		*blocks_reserved,
	bool			*done)
{}

/*
 * Iteratively reserve space for rmap btree while recording OWN_AG rmaps for
 * the free space metadata.  This implements section (II) above.
 */
STATIC int
xrep_rmap_reserve_space(
	struct xrep_rmap	*rr,
	struct xfs_btree_cur	*rmap_cur)
{}

/* Section (III): Building the new rmap btree. */

/* Update the AGF counters. */
STATIC int
xrep_rmap_reset_counters(
	struct xrep_rmap	*rr)
{}

/* Retrieve rmapbt data for bulk load. */
STATIC int
xrep_rmap_get_records(
	struct xfs_btree_cur	*cur,
	unsigned int		idx,
	struct xfs_btree_block	*block,
	unsigned int		nr_wanted,
	void			*priv)
{}

/* Feed one of the new btree blocks to the bulk loader. */
STATIC int
xrep_rmap_claim_block(
	struct xfs_btree_cur	*cur,
	union xfs_btree_ptr	*ptr,
	void			*priv)
{}

/* Custom allocation function for new rmap btrees. */
STATIC int
xrep_rmap_alloc_vextent(
	struct xfs_scrub	*sc,
	struct xfs_alloc_arg	*args,
	xfs_fsblock_t		alloc_hint)
{}


/* Count the records in this btree. */
STATIC int
xrep_rmap_count_records(
	struct xfs_btree_cur	*cur,
	unsigned long long	*nr)
{}
/*
 * Use the collected rmap information to stage a new rmap btree.  If this is
 * successful we'll return with the new btree root information logged to the
 * repair transaction but not yet committed.  This implements section (III)
 * above.
 */
STATIC int
xrep_rmap_build_new_tree(
	struct xrep_rmap	*rr)
{}

/* Section (IV): Reaping the old btree. */

struct xrep_rmap_find_gaps {};

/* Subtract each free extent in the bnobt from the rmap gaps. */
STATIC int
xrep_rmap_find_freesp(
	struct xfs_btree_cur		*cur,
	const struct xfs_alloc_rec_incore *rec,
	void				*priv)
{}

/* Record the free space we find, as part of cleaning out the btree. */
STATIC int
xrep_rmap_find_gaps(
	struct xfs_btree_cur		*cur,
	const struct xfs_rmap_irec	*rec,
	void				*priv)
{}

/*
 * Reap the old rmapbt blocks.  Now that the rmapbt is fully rebuilt, we make
 * a list of gaps in the rmap records and a list of the extents mentioned in
 * the bnobt.  Any block that's in the new rmapbt gap list but not mentioned
 * in the bnobt is a block from the old rmapbt and can be removed.
 */
STATIC int
xrep_rmap_remove_old_tree(
	struct xrep_rmap	*rr)
{}

static inline bool
xrep_rmapbt_want_live_update(
	struct xchk_iscan		*iscan,
	const struct xfs_owner_info	*oi)
{}

/*
 * Apply a rmapbt update from the regular filesystem into our shadow btree.
 * We're running from the thread that owns the AGF buffer and is generating
 * the update, so we must be careful about which parts of the struct xrep_rmap
 * that we change.
 */
static int
xrep_rmapbt_live_update(
	struct notifier_block		*nb,
	unsigned long			action,
	void				*data)
{}

/* Set up the filesystem scan components. */
STATIC int
xrep_rmap_setup_scan(
	struct xrep_rmap	*rr)
{}

/* Tear down scan components. */
STATIC void
xrep_rmap_teardown(
	struct xrep_rmap	*rr)
{}

/* Repair the rmap btree for some AG. */
int
xrep_rmapbt(
	struct xfs_scrub	*sc)
{}