// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <[email protected]> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_btree.h" #include "xfs_btree_staging.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_icache.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" #include "xfs_log.h" #include "xfs_trans_priv.h" #include "xfs_error.h" #include "xfs_health.h" #include "xfs_ag.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/bitmap.h" #include "scrub/agb_bitmap.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/newbt.h" #include "scrub/reap.h" /* * Inode Btree Repair * ================== * * A quick refresher of inode btrees on a v5 filesystem: * * - Inode records are read into memory in units of 'inode clusters'. However * many inodes fit in a cluster buffer is the smallest number of inodes that * can be allocated or freed. Clusters are never smaller than one fs block * though they can span multiple blocks. The size (in fs blocks) is * computed with xfs_icluster_size_fsb(). The fs block alignment of a * cluster is computed with xfs_ialloc_cluster_alignment(). * * - Each inode btree record can describe a single 'inode chunk'. The chunk * size is defined to be 64 inodes. If sparse inodes are enabled, every * inobt record must be aligned to the chunk size; if not, every record must * be aligned to the start of a cluster. It is possible to construct an XFS * geometry where one inobt record maps to multiple inode clusters; it is * also possible to construct a geometry where multiple inobt records map to * different parts of one inode cluster. * * - If sparse inodes are not enabled, the smallest unit of allocation for * inode records is enough to contain one inode chunk's worth of inodes. * * - If sparse inodes are enabled, the holemask field will be active. Each * bit of the holemask represents 4 potential inodes; if set, the * corresponding space does *not* contain inodes and must be left alone. * Clusters cannot be smaller than 4 inodes. The smallest unit of allocation * of inode records is one inode cluster. * * So what's the rebuild algorithm? * * Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT * records. The OWN_INOBT records are the old inode btree blocks and will be * cleared out after we've rebuilt the tree. Each possible inode cluster * within an OWN_INODES record will be read in; for each possible inobt record * associated with that cluster, compute the freemask calculated from the * i_mode data in the inode chunk. For sparse inodes the holemask will be * calculated by creating the properly aligned inobt record and punching out * any chunk that's missing. Inode allocations and frees grab the AGI first, * so repair protects itself from concurrent access by locking the AGI. * * Once we've reconstructed all the inode records, we can create new inode * btree roots and reload the btrees. We rebuild both inode trees at the same * time because they have the same rmap owner and it would be more complex to * figure out if the other tree isn't in need of a rebuild and which OWN_INOBT * blocks it owns. We have all the data we need to build both, so dump * everything and start over. * * We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once. */ struct xrep_ibt { … }; /* * Is this inode in use? If the inode is in memory we can tell from i_mode, * otherwise we have to check di_mode in the on-disk buffer. We only care * that the high (i.e. non-permission) bits of _mode are zero. This should be * safe because repair keeps all AG headers locked until the end, and process * trying to perform an inode allocation/free must lock the AGI. * * @cluster_ag_base is the inode offset of the cluster within the AG. * @cluster_bp is the cluster buffer. * @cluster_index is the inode offset within the inode cluster. */ STATIC int xrep_ibt_check_ifree( struct xrep_ibt *ri, xfs_agino_t cluster_ag_base, struct xfs_buf *cluster_bp, unsigned int cluster_index, bool *inuse) { … } /* Stash the accumulated inobt record for rebuilding. */ STATIC int xrep_ibt_stash( struct xrep_ibt *ri) { … } /* * Given an extent of inodes and an inode cluster buffer, calculate the * location of the corresponding inobt record (creating it if necessary), * then update the parts of the holemask and freemask of that record that * correspond to the inode extent we were given. * * @cluster_ir_startino is the AG inode number of an inobt record that we're * proposing to create for this inode cluster. If sparse inodes are enabled, * we must round down to a chunk boundary to find the actual sparse record. * @cluster_bp is the buffer of the inode cluster. * @nr_inodes is the number of inodes to check from the cluster. */ STATIC int xrep_ibt_cluster_record( struct xrep_ibt *ri, xfs_agino_t cluster_ir_startino, struct xfs_buf *cluster_bp, unsigned int nr_inodes) { … } /* * For each inode cluster covering the physical extent recorded by the rmapbt, * we must calculate the properly aligned startino of that cluster, then * iterate each cluster to fill in used and filled masks appropriately. We * then use the (startino, used, filled) information to construct the * appropriate inode records. */ STATIC int xrep_ibt_process_cluster( struct xrep_ibt *ri, xfs_agblock_t cluster_bno) { … } /* Check for any obvious conflicts in the inode chunk extent. */ STATIC int xrep_ibt_check_inode_ext( struct xfs_scrub *sc, xfs_agblock_t agbno, xfs_extlen_t len) { … } /* Found a fragment of the old inode btrees; dispose of them later. */ STATIC int xrep_ibt_record_old_btree_blocks( struct xrep_ibt *ri, const struct xfs_rmap_irec *rec) { … } /* Record extents that belong to inode cluster blocks. */ STATIC int xrep_ibt_record_inode_blocks( struct xrep_ibt *ri, const struct xfs_rmap_irec *rec) { … } STATIC int xrep_ibt_walk_rmap( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *rec, void *priv) { … } /* * Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode * btrees (OWN_INOBT). Figure out if we have enough free space to reconstruct * the inode btrees. The caller must clean up the lists if anything goes * wrong. */ STATIC int xrep_ibt_find_inodes( struct xrep_ibt *ri) { … } /* Update the AGI counters. */ STATIC int xrep_ibt_reset_counters( struct xrep_ibt *ri) { … } /* Retrieve finobt data for bulk load. */ STATIC int xrep_fibt_get_records( struct xfs_btree_cur *cur, unsigned int idx, struct xfs_btree_block *block, unsigned int nr_wanted, void *priv) { … } /* Retrieve inobt data for bulk load. */ STATIC int xrep_ibt_get_records( struct xfs_btree_cur *cur, unsigned int idx, struct xfs_btree_block *block, unsigned int nr_wanted, void *priv) { … } /* Feed one of the new inobt blocks to the bulk loader. */ STATIC int xrep_ibt_claim_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, void *priv) { … } /* Feed one of the new finobt blocks to the bulk loader. */ STATIC int xrep_fibt_claim_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, void *priv) { … } /* Make sure the records do not overlap in inumber address space. */ STATIC int xrep_ibt_check_overlap( struct xrep_ibt *ri) { … } /* Build new inode btrees and dispose of the old one. */ STATIC int xrep_ibt_build_new_trees( struct xrep_ibt *ri) { … } /* * Now that we've logged the roots of the new btrees, invalidate all of the * old blocks and free them. */ STATIC int xrep_ibt_remove_old_trees( struct xrep_ibt *ri) { … } /* Repair both inode btrees. */ int xrep_iallocbt( struct xfs_scrub *sc) { … } /* Make sure both btrees are ok after we've rebuilt them. */ int xrep_revalidate_iallocbt( struct xfs_scrub *sc) { … }