// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <[email protected]> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_btree.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "xfs_alloc.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_refcount_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" #include "xfs_log.h" #include "xfs_trans_priv.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" #include "xfs_dir2.h" #include "xfs_attr.h" #include "xfs_reflink.h" #include "xfs_ag.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_exchmaps.h" #include "xfs_rtbitmap.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/health.h" /* Common code for the metadata scrubbers. */ /* * Handling operational errors. * * The *_process_error() family of functions are used to process error return * codes from functions called as part of a scrub operation. * * If there's no error, we return true to tell the caller that it's ok * to move on to the next check in its list. * * For non-verifier errors (e.g. ENOMEM) we return false to tell the * caller that something bad happened, and we preserve *error so that * the caller can return the *error up the stack to userspace. * * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words, * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT, * not via return codes. We return false to tell the caller that * something bad happened. Since the error has been cleared, the caller * will (presumably) return that zero and scrubbing will move on to * whatever's next. * * ftrace can be used to record the precise metadata location and the * approximate code location of the failed operation. */ /* Check for operational errors. */ static bool __xchk_process_error( struct xfs_scrub *sc, xfs_agnumber_t agno, xfs_agblock_t bno, int *error, __u32 errflag, void *ret_ip) { … } bool xchk_process_error( struct xfs_scrub *sc, xfs_agnumber_t agno, xfs_agblock_t bno, int *error) { … } bool xchk_xref_process_error( struct xfs_scrub *sc, xfs_agnumber_t agno, xfs_agblock_t bno, int *error) { … } /* Check for operational errors for a file offset. */ static bool __xchk_fblock_process_error( struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset, int *error, __u32 errflag, void *ret_ip) { … } bool xchk_fblock_process_error( struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset, int *error) { … } bool xchk_fblock_xref_process_error( struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset, int *error) { … } /* * Handling scrub corruption/optimization/warning checks. * * The *_set_{corrupt,preen,warning}() family of functions are used to * record the presence of metadata that is incorrect (corrupt), could be * optimized somehow (preen), or should be flagged for administrative * review but is not incorrect (warn). * * ftrace can be used to record the precise metadata location and * approximate code location of the failed check. */ /* Record a block which could be optimized. */ void xchk_block_set_preen( struct xfs_scrub *sc, struct xfs_buf *bp) { … } /* * Record an inode which could be optimized. The trace data will * include the block given by bp if bp is given; otherwise it will use * the block location of the inode record itself. */ void xchk_ino_set_preen( struct xfs_scrub *sc, xfs_ino_t ino) { … } /* Record something being wrong with the filesystem primary superblock. */ void xchk_set_corrupt( struct xfs_scrub *sc) { … } /* Record a corrupt block. */ void xchk_block_set_corrupt( struct xfs_scrub *sc, struct xfs_buf *bp) { … } #ifdef CONFIG_XFS_QUOTA /* Record a corrupt quota counter. */ void xchk_qcheck_set_corrupt( struct xfs_scrub *sc, unsigned int dqtype, xfs_dqid_t id) { … } #endif /* Record a corruption while cross-referencing. */ void xchk_block_xref_set_corrupt( struct xfs_scrub *sc, struct xfs_buf *bp) { … } /* * Record a corrupt inode. The trace data will include the block given * by bp if bp is given; otherwise it will use the block location of the * inode record itself. */ void xchk_ino_set_corrupt( struct xfs_scrub *sc, xfs_ino_t ino) { … } /* Record a corruption while cross-referencing with an inode. */ void xchk_ino_xref_set_corrupt( struct xfs_scrub *sc, xfs_ino_t ino) { … } /* Record corruption in a block indexed by a file fork. */ void xchk_fblock_set_corrupt( struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset) { … } /* Record a corruption while cross-referencing a fork block. */ void xchk_fblock_xref_set_corrupt( struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset) { … } /* * Warn about inodes that need administrative review but is not * incorrect. */ void xchk_ino_set_warning( struct xfs_scrub *sc, xfs_ino_t ino) { … } /* Warn about a block indexed by a file fork that needs review. */ void xchk_fblock_set_warning( struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset) { … } /* Signal an incomplete scrub. */ void xchk_set_incomplete( struct xfs_scrub *sc) { … } /* * rmap scrubbing -- compute the number of blocks with a given owner, * at least according to the reverse mapping data. */ struct xchk_rmap_ownedby_info { … }; STATIC int xchk_count_rmap_ownedby_irec( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *rec, void *priv) { … } /* * Calculate the number of blocks the rmap thinks are owned by something. * The caller should pass us an rmapbt cursor. */ int xchk_count_rmap_ownedby_ag( struct xfs_scrub *sc, struct xfs_btree_cur *cur, const struct xfs_owner_info *oinfo, xfs_filblks_t *blocks) { … } /* * AG scrubbing * * These helpers facilitate locking an allocation group's header * buffers, setting up cursors for all btrees that are present, and * cleaning everything up once we're through. */ /* Decide if we want to return an AG header read failure. */ static inline bool want_ag_read_header_failure( struct xfs_scrub *sc, unsigned int type) { … } /* * Grab the AG header buffers for the attached perag structure. * * The headers should be released by xchk_ag_free, but as a fail safe we attach * all the buffers we grab to the scrub transaction so they'll all be freed * when we cancel it. */ static inline int xchk_perag_read_headers( struct xfs_scrub *sc, struct xchk_ag *sa) { … } /* * Grab the AG headers for the attached perag structure and wait for pending * intents to drain. */ int xchk_perag_drain_and_lock( struct xfs_scrub *sc) { … } /* * Grab the per-AG structure, grab all AG header buffers, and wait until there * aren't any pending intents. Returns -ENOENT if we can't grab the perag * structure. */ int xchk_ag_read_headers( struct xfs_scrub *sc, xfs_agnumber_t agno, struct xchk_ag *sa) { … } /* Release all the AG btree cursors. */ void xchk_ag_btcur_free( struct xchk_ag *sa) { … } /* Initialize all the btree cursors for an AG. */ void xchk_ag_btcur_init( struct xfs_scrub *sc, struct xchk_ag *sa) { … } /* Release the AG header context and btree cursors. */ void xchk_ag_free( struct xfs_scrub *sc, struct xchk_ag *sa) { … } /* * For scrub, grab the perag structure, the AGI, and the AGF headers, in that * order. Locking order requires us to get the AGI before the AGF. We use the * transaction to avoid deadlocking on crosslinked metadata buffers; either the * caller passes one in (bmap scrub) or we have to create a transaction * ourselves. Returns ENOENT if the perag struct cannot be grabbed. */ int xchk_ag_init( struct xfs_scrub *sc, xfs_agnumber_t agno, struct xchk_ag *sa) { … } /* Per-scrubber setup functions */ void xchk_trans_cancel( struct xfs_scrub *sc) { … } int xchk_trans_alloc_empty( struct xfs_scrub *sc) { … } /* * Grab an empty transaction so that we can re-grab locked buffers if * one of our btrees turns out to be cyclic. * * If we're going to repair something, we need to ask for the largest possible * log reservation so that we can handle the worst case scenario for metadata * updates while rebuilding a metadata item. We also need to reserve as many * blocks in the head transaction as we think we're going to need to rebuild * the metadata object. */ int xchk_trans_alloc( struct xfs_scrub *sc, uint resblks) { … } /* Set us up with a transaction and an empty context. */ int xchk_setup_fs( struct xfs_scrub *sc) { … } /* Set us up with AG headers and btree cursors. */ int xchk_setup_ag_btree( struct xfs_scrub *sc, bool force_log) { … } /* Push everything out of the log onto disk. */ int xchk_checkpoint_log( struct xfs_mount *mp) { … } /* Verify that an inode is allocated ondisk, then return its cached inode. */ int xchk_iget( struct xfs_scrub *sc, xfs_ino_t inum, struct xfs_inode **ipp) { … } /* * Try to grab an inode in a manner that avoids races with physical inode * allocation. If we can't, return the locked AGI buffer so that the caller * can single-step the loading process to see where things went wrong. * Callers must have a valid scrub transaction. * * If the iget succeeds, return 0, a NULL AGI, and the inode. * * If the iget fails, return the error, the locked AGI, and a NULL inode. This * can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are * no longer allocated; or any other corruption or runtime error. * * If the AGI read fails, return the error, a NULL AGI, and NULL inode. * * If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode. */ int xchk_iget_agi( struct xfs_scrub *sc, xfs_ino_t inum, struct xfs_buf **agi_bpp, struct xfs_inode **ipp) { … } #ifdef CONFIG_XFS_QUOTA /* * Try to attach dquots to this inode if we think we might want to repair it. * Callers must not hold any ILOCKs. If the dquots are broken and cannot be * attached, a quotacheck will be scheduled. */ int xchk_ino_dqattach( struct xfs_scrub *sc) { … } #endif /* Install an inode that we opened by handle for scrubbing. */ int xchk_install_handle_inode( struct xfs_scrub *sc, struct xfs_inode *ip) { … } /* * Install an already-referenced inode for scrubbing. Get our own reference to * the inode to make disposal simpler. The inode must not be in I_FREEING or * I_WILL_FREE state! */ int xchk_install_live_inode( struct xfs_scrub *sc, struct xfs_inode *ip) { … } /* * In preparation to scrub metadata structures that hang off of an inode, * grab either the inode referenced in the scrub control structure or the * inode passed in. If the inumber does not reference an allocated inode * record, the function returns ENOENT to end the scrub early. The inode * is not locked. */ int xchk_iget_for_scrubbing( struct xfs_scrub *sc) { … } /* Release an inode, possibly dropping it in the process. */ void xchk_irele( struct xfs_scrub *sc, struct xfs_inode *ip) { … } /* * Set us up to scrub metadata mapped by a file's fork. Callers must not use * this to operate on user-accessible regular file data because the MMAPLOCK is * not taken. */ int xchk_setup_inode_contents( struct xfs_scrub *sc, unsigned int resblks) { … } void xchk_ilock( struct xfs_scrub *sc, unsigned int ilock_flags) { … } bool xchk_ilock_nowait( struct xfs_scrub *sc, unsigned int ilock_flags) { … } void xchk_iunlock( struct xfs_scrub *sc, unsigned int ilock_flags) { … } /* * Predicate that decides if we need to evaluate the cross-reference check. * If there was an error accessing the cross-reference btree, just delete * the cursor and skip the check. */ bool xchk_should_check_xref( struct xfs_scrub *sc, int *error, struct xfs_btree_cur **curpp) { … } /* Run the structure verifiers on in-memory buffers to detect bad memory. */ void xchk_buffer_recheck( struct xfs_scrub *sc, struct xfs_buf *bp) { … } static inline int xchk_metadata_inode_subtype( struct xfs_scrub *sc, unsigned int scrub_type) { … } /* * Scrub the attr/data forks of a metadata inode. The metadata inode must be * pointed to by sc->ip and the ILOCK must be held. */ int xchk_metadata_inode_forks( struct xfs_scrub *sc) { … } /* * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub * operation. Callers must not hold any locks that intersect with the CPU * hotplug lock (e.g. writeback locks) because code patching must halt the CPUs * to change kernel code. */ void xchk_fsgates_enable( struct xfs_scrub *sc, unsigned int scrub_fsgates) { … } /* * Decide if this is this a cached inode that's also allocated. The caller * must hold a reference to an AG and the AGI buffer lock to prevent inodes * from being allocated or freed. * * Look up an inode by number in the given file system. If the inode number * is invalid, return -EINVAL. If the inode is not in cache, return -ENODATA. * If the inode is being reclaimed, return -ENODATA because we know the inode * cache cannot be updating the ondisk metadata. * * Otherwise, the incore inode is the one we want, and it is either live, * somewhere in the inactivation machinery, or reclaimable. The inode is * allocated if i_mode is nonzero. In all three cases, the cached inode will * be more up to date than the ondisk inode buffer, so we must use the incore * i_mode. */ int xchk_inode_is_allocated( struct xfs_scrub *sc, xfs_agino_t agino, bool *inuse) { … }