// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <[email protected]> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_btree.h" #include "xfs_btree_staging.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_alloc.h" #include "xfs_alloc_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" #include "xfs_inode.h" #include "xfs_refcount.h" #include "xfs_extent_busy.h" #include "xfs_health.h" #include "xfs_bmap.h" #include "xfs_ialloc.h" #include "xfs_ag.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/bitmap.h" #include "scrub/agb_bitmap.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/newbt.h" #include "scrub/reap.h" /* * Free Space Btree Repair * ======================= * * The reverse mappings are supposed to record all space usage for the entire * AG. Therefore, we can recreate the free extent records in an AG by looking * for gaps in the physical extents recorded in the rmapbt. These records are * staged in @free_records. Identifying the gaps is more difficult on a * reflink filesystem because rmap records are allowed to overlap. * * Because the final step of building a new index is to free the space used by * the old index, repair needs to find that space. Unfortunately, all * structures that live in the free space (bnobt, cntbt, rmapbt, agfl) share * the same rmapbt owner code (OWN_AG), so this is not straightforward. * * The scan of the reverse mapping information records the space used by OWN_AG * in @old_allocbt_blocks, which (at this stage) is somewhat misnamed. While * walking the rmapbt records, we create a second bitmap @not_allocbt_blocks to * record all visited rmap btree blocks and all blocks owned by the AGFL. * * After that is where the definitions of old_allocbt_blocks shifts. This * expression identifies possible former bnobt/cntbt blocks: * * (OWN_AG blocks) & ~(rmapbt blocks | agfl blocks); * * Substituting from above definitions, that becomes: * * old_allocbt_blocks & ~not_allocbt_blocks * * The OWN_AG bitmap itself isn't needed after this point, so what we really do * instead is: * * old_allocbt_blocks &= ~not_allocbt_blocks; * * After this point, @old_allocbt_blocks is a bitmap of alleged former * bnobt/cntbt blocks. The xagb_bitmap_disunion operation modifies its first * parameter in place to avoid copying records around. * * Next, some of the space described by @free_records are diverted to the newbt * reservation and used to format new btree blocks. The remaining records are * written to the new btree indices. We reconstruct both bnobt and cntbt at * the same time since we've already done all the work. * * We use the prefix 'xrep_abt' here because we regenerate both free space * allocation btrees at the same time. */ struct xrep_abt { … }; /* Set up to repair AG free space btrees. */ int xrep_setup_ag_allocbt( struct xfs_scrub *sc) { … } /* Check for any obvious conflicts in the free extent. */ STATIC int xrep_abt_check_free_ext( struct xfs_scrub *sc, const struct xfs_alloc_rec_incore *rec) { … } /* * Stash a free space record for all the space since the last bno we found * all the way up to @end. */ static int xrep_abt_stash( struct xrep_abt *ra, xfs_agblock_t end) { … } /* Record extents that aren't in use from gaps in the rmap records. */ STATIC int xrep_abt_walk_rmap( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *rec, void *priv) { … } /* Collect an AGFL block for the not-to-release list. */ static int xrep_abt_walk_agfl( struct xfs_mount *mp, xfs_agblock_t agbno, void *priv) { … } /* * Compare two free space extents by block number. We want to sort in order of * increasing block number. */ static int xrep_bnobt_extent_cmp( const void *a, const void *b) { … } /* * Re-sort the free extents by block number so that we can put the records into * the bnobt in the correct order. Make sure the records do not overlap in * physical space. */ STATIC int xrep_bnobt_sort_records( struct xrep_abt *ra) { … } /* * Compare two free space extents by length and then block number. We want * to sort first in order of increasing length and then in order of increasing * block number. */ static int xrep_cntbt_extent_cmp( const void *a, const void *b) { … } /* * Sort the free extents by length so so that we can put the records into the * cntbt in the correct order. Don't let userspace kill us if we're resorting * after allocating btree blocks. */ STATIC int xrep_cntbt_sort_records( struct xrep_abt *ra, bool is_resort) { … } /* * Iterate all reverse mappings to find (1) the gaps between rmap records (all * unowned space), (2) the OWN_AG extents (which encompass the free space * btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL * blocks. The free space is (1) + (2) - (3) - (4). */ STATIC int xrep_abt_find_freespace( struct xrep_abt *ra) { … } /* * We're going to use the observed free space records to reserve blocks for the * new free space btrees, so we play an iterative game where we try to converge * on the number of blocks we need: * * 1. Estimate how many blocks we'll need to store the records. * 2. If the first free record has more blocks than we need, we're done. * We will have to re-sort the records prior to building the cntbt. * 3. If that record has exactly the number of blocks we need, null out the * record. We're done. * 4. Otherwise, we still need more blocks. Null out the record, subtract its * length from the number of blocks we need, and go back to step 1. * * Fortunately, we don't have to do any transaction work to play this game, so * we don't have to tear down the staging cursors. */ STATIC int xrep_abt_reserve_space( struct xrep_abt *ra, struct xfs_btree_cur *bno_cur, struct xfs_btree_cur *cnt_cur, bool *needs_resort) { … } STATIC int xrep_abt_dispose_one( struct xrep_abt *ra, struct xrep_newbt_resv *resv) { … } /* * Deal with all the space we reserved. Blocks that were allocated for the * free space btrees need to have a (deferred) rmap added for the OWN_AG * allocation, and blocks that didn't get used can be freed via the usual * (deferred) means. */ STATIC void xrep_abt_dispose_reservations( struct xrep_abt *ra, int error) { … } /* Retrieve free space data for bulk load. */ STATIC int xrep_abt_get_records( struct xfs_btree_cur *cur, unsigned int idx, struct xfs_btree_block *block, unsigned int nr_wanted, void *priv) { … } /* Feed one of the new btree blocks to the bulk loader. */ STATIC int xrep_abt_claim_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, void *priv) { … } /* * Reset the AGF counters to reflect the free space btrees that we just * rebuilt, then reinitialize the per-AG data. */ STATIC int xrep_abt_reset_counters( struct xrep_abt *ra) { … } /* * Use the collected free space information to stage new free space btrees. * If this is successful we'll return with the new btree root * information logged to the repair transaction but not yet committed. */ STATIC int xrep_abt_build_new_trees( struct xrep_abt *ra) { … } /* * Now that we've logged the roots of the new btrees, invalidate all of the * old blocks and free them. */ STATIC int xrep_abt_remove_old_trees( struct xrep_abt *ra) { … } /* Repair the freespace btrees for some AG. */ int xrep_allocbt( struct xfs_scrub *sc) { … } /* Make sure both btrees are ok after we've rebuilt them. */ int xrep_revalidate_allocbt( struct xfs_scrub *sc) { … }