// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <[email protected]> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_bmap.h" #include "xfs_quota.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" #include "xfs_bmap_util.h" #include "xfs_exchmaps.h" #include "xfs_exchrange.h" #include "xfs_ag.h" #include "xfs_parent.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/tempfile.h" #include "scrub/tempexch.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/xfblob.h" #include "scrub/iscan.h" #include "scrub/readdir.h" #include "scrub/reap.h" #include "scrub/findparent.h" #include "scrub/orphanage.h" #include "scrub/listxattr.h" /* * Directory Repair * ================ * * We repair directories by reading the directory data blocks looking for * directory entries that look salvageable (name passes verifiers, entry points * to a valid allocated inode, etc). Each entry worth salvaging is stashed in * memory, and the stashed entries are periodically replayed into a temporary * directory to constrain memory use. Batching the construction of the * temporary directory in this fashion reduces lock cycling of the directory * being repaired and the temporary directory, and will later become important * for parent pointer scanning. * * If parent pointers are enabled on this filesystem, we instead reconstruct * the directory by visiting each parent pointer of each file in the filesystem * and translating the relevant parent pointer records into dirents. In this * case, it is advantageous to stash all directory entries created from parent * pointers for a single child file before replaying them into the temporary * directory. To save memory, the live filesystem scan reuses the findparent * fields. Directory repair chooses either parent pointer scanning or * directory entry salvaging, but not both. * * Directory entries added to the temporary directory do not elevate the link * counts of the inodes found. When salvaging completes, the remaining stashed * entries are replayed to the temporary directory. An atomic mapping exchange * is used to commit the new directory blocks to the directory being repaired. * This will disrupt readdir cursors. * * Locking Issues * -------------- * * If /a, /a/b, and /c are all directories, the VFS does not take i_rwsem on * /a/b for a "mv /a/b /c/" operation. This means that only b's ILOCK protects * b's dotdot update. This is in contrast to every other dotdot update (link, * remove, mkdir). If the repair code drops the ILOCK, it must either * revalidate the dotdot entry or use dirent hooks to capture updates from * other threads. */ /* Create a dirent in the tempdir. */ #define XREP_DIRENT_ADD … /* Remove a dirent from the tempdir. */ #define XREP_DIRENT_REMOVE … /* Directory entry to be restored in the new directory. */ struct xrep_dirent { … }; /* * Stash up to 8 pages of recovered dirent data in dir_entries and dir_names * before we write them to the temp dir. */ #define XREP_DIR_MAX_STASH_BYTES … struct xrep_dir { … }; /* Tear down all the incore stuff we created. */ static void xrep_dir_teardown( struct xfs_scrub *sc) { … } /* Set up for a directory repair. */ int xrep_setup_directory( struct xfs_scrub *sc) { … } /* * Look up the dotdot entry and confirm that it's really the parent. * Returns NULLFSINO if we don't know what to do. */ static inline xfs_ino_t xrep_dir_lookup_parent( struct xrep_dir *rd) { … } /* * Look up '..' in the dentry cache and confirm that it's really the parent. * Returns NULLFSINO if the dcache misses or if the hit is implausible. */ static inline xfs_ino_t xrep_dir_dcache_parent( struct xrep_dir *rd) { … } /* Try to find the parent of the directory being repaired. */ STATIC int xrep_dir_find_parent( struct xrep_dir *rd) { … } /* * Decide if we want to salvage this entry. We don't bother with oversized * names or the dot entry. */ STATIC int xrep_dir_want_salvage( struct xrep_dir *rd, const char *name, int namelen, xfs_ino_t ino) { … } /* * Remember that we want to create a dirent in the tempdir. These stashed * actions will be replayed later. */ STATIC int xrep_dir_stash_createname( struct xrep_dir *rd, const struct xfs_name *name, xfs_ino_t ino) { … } /* * Remember that we want to remove a dirent from the tempdir. These stashed * actions will be replayed later. */ STATIC int xrep_dir_stash_removename( struct xrep_dir *rd, const struct xfs_name *name, xfs_ino_t ino) { … } /* Allocate an in-core record to hold entries while we rebuild the dir data. */ STATIC int xrep_dir_salvage_entry( struct xrep_dir *rd, unsigned char *name, unsigned int namelen, xfs_ino_t ino) { … } /* Record a shortform directory entry for later reinsertion. */ STATIC int xrep_dir_salvage_sf_entry( struct xrep_dir *rd, struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep) { … } /* Record a regular directory entry for later reinsertion. */ STATIC int xrep_dir_salvage_data_entry( struct xrep_dir *rd, struct xfs_dir2_data_entry *dep) { … } /* Try to recover block/data format directory entries. */ STATIC int xrep_dir_recover_data( struct xrep_dir *rd, struct xfs_buf *bp) { … } /* Try to recover shortform directory entries. */ STATIC int xrep_dir_recover_sf( struct xrep_dir *rd) { … } /* * Try to figure out the format of this directory from the data fork mappings * and the directory size. If we can be reasonably sure of format, we can be * more aggressive in salvaging directory entries. On return, @magic_guess * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format" * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory, * and 0 if we can't tell. */ STATIC void xrep_dir_guess_format( struct xrep_dir *rd, __be32 *magic_guess) { … } /* Recover directory entries from a specific directory block. */ STATIC int xrep_dir_recover_dirblock( struct xrep_dir *rd, __be32 magic_guess, xfs_dablk_t dabno) { … } static inline void xrep_dir_init_args( struct xrep_dir *rd, struct xfs_inode *dp, const struct xfs_name *name) { … } /* Replay a stashed createname into the temporary directory. */ STATIC int xrep_dir_replay_createname( struct xrep_dir *rd, const struct xfs_name *name, xfs_ino_t inum, xfs_extlen_t total) { … } /* Replay a stashed removename onto the temporary directory. */ STATIC int xrep_dir_replay_removename( struct xrep_dir *rd, const struct xfs_name *name, xfs_extlen_t total) { … } /* * Add this stashed incore directory entry to the temporary directory. * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and * must not be in transaction context. */ STATIC int xrep_dir_replay_update( struct xrep_dir *rd, const struct xfs_name *xname, const struct xrep_dirent *dirent) { … } /* * Flush stashed incore dirent updates that have been recorded by the scanner. * This is done to reduce the memory requirements of the directory rebuild, * since directories can contain up to 32GB of directory data. * * Caller must not hold transactions or ILOCKs. Caller must hold the tempdir * IOLOCK. */ STATIC int xrep_dir_replay_updates( struct xrep_dir *rd) { … } /* * Periodically flush stashed directory entries to the temporary dir. This * is done to reduce the memory requirements of the directory rebuild, since * directories can contain up to 32GB of directory data. */ STATIC int xrep_dir_flush_stashed( struct xrep_dir *rd) { … } /* Decide if we've stashed too much dirent data in memory. */ static inline bool xrep_dir_want_flush_stashed( struct xrep_dir *rd) { … } /* Extract as many directory entries as we can. */ STATIC int xrep_dir_recover( struct xrep_dir *rd) { … } /* * Find all the directory entries for this inode by scraping them out of the * directory leaf blocks by hand, and flushing them into the temp dir. */ STATIC int xrep_dir_find_entries( struct xrep_dir *rd) { … } /* Scan all files in the filesystem for dirents. */ STATIC int xrep_dir_salvage_entries( struct xrep_dir *rd) { … } /* * Examine a parent pointer of a file. If it leads us back to the directory * that we're rebuilding, create an incore dirent from the parent pointer and * stash it. */ STATIC int xrep_dir_scan_pptr( struct xfs_scrub *sc, struct xfs_inode *ip, unsigned int attr_flags, const unsigned char *name, unsigned int namelen, const void *value, unsigned int valuelen, void *priv) { … } /* * If this child dirent points to the directory being repaired, remember that * fact so that we can reset the dotdot entry if necessary. */ STATIC int xrep_dir_scan_dirent( struct xfs_scrub *sc, struct xfs_inode *dp, xfs_dir2_dataptr_t dapos, const struct xfs_name *name, xfs_ino_t ino, void *priv) { … } /* * Decide if we want to look for child dirents or parent pointers in this file. * Skip the dir being repaired and any files being used to stage repairs. */ static inline bool xrep_dir_want_scan( struct xrep_dir *rd, const struct xfs_inode *ip) { … } /* * Take ILOCK on a file that we want to scan. * * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or * has an unloaded attr bmbt. Otherwise, take ILOCK_SHARED. */ static inline unsigned int xrep_dir_scan_ilock( struct xrep_dir *rd, struct xfs_inode *ip) { … } /* * Scan this file for relevant child dirents or parent pointers that point to * the directory we're rebuilding. */ STATIC int xrep_dir_scan_file( struct xrep_dir *rd, struct xfs_inode *ip) { … } /* * Scan all files in the filesystem for parent pointers that we can turn into * replacement dirents, and a dirent that we can use to set the dotdot pointer. */ STATIC int xrep_dir_scan_dirtree( struct xrep_dir *rd) { … } /* * Capture dirent updates being made by other threads which are relevant to the * directory being repaired. */ STATIC int xrep_dir_live_update( struct notifier_block *nb, unsigned long action, void *data) { … } /* * Free all the directory blocks and reset the data fork. The caller must * join the inode to the transaction. This function returns with the inode * joined to a clean scrub transaction. */ STATIC int xrep_dir_reset_fork( struct xrep_dir *rd, xfs_ino_t parent_ino) { … } /* * Prepare both inodes' directory forks for exchanging mappings. Promote the * tempfile from short format to leaf format, and if the file being repaired * has a short format data fork, turn it into an empty extent list. */ STATIC int xrep_dir_swap_prep( struct xfs_scrub *sc, bool temp_local, bool ip_local) { … } /* * Replace the inode number of a directory entry. */ static int xrep_dir_replace( struct xrep_dir *rd, struct xfs_inode *dp, const struct xfs_name *name, xfs_ino_t inum, xfs_extlen_t total) { … } /* * Reset the link count of this directory and adjust the unlinked list pointers * as needed. */ STATIC int xrep_dir_set_nlink( struct xrep_dir *rd) { … } /* * Finish replaying stashed dirent updates, allocate a transaction for * exchanging data fork mappings, and take the ILOCKs of both directories * before we commit the new directory structure. */ STATIC int xrep_dir_finalize_tempdir( struct xrep_dir *rd) { … } /* Exchange the temporary directory's data fork with the one being repaired. */ STATIC int xrep_dir_swap( struct xrep_dir *rd) { … } /* * Exchange the new directory contents (which we created in the tempfile) with * the directory being repaired. */ STATIC int xrep_dir_rebuild_tree( struct xrep_dir *rd) { … } /* Set up the filesystem scan so we can regenerate directory entries. */ STATIC int xrep_dir_setup_scan( struct xrep_dir *rd) { … } /* * Move the current file to the orphanage. * * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks. Upon * successful return, the scrub transaction will have enough extra reservation * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the * orphanage; and both inodes will be ijoined. */ STATIC int xrep_dir_move_to_orphanage( struct xrep_dir *rd) { … } /* * Repair the directory metadata. * * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer * cache in XFS can't handle aliased multiblock buffers, so this might * misbehave if the directory blocks are crosslinked with other filesystem * metadata. * * XXX: Is it necessary to check the dcache for this directory to make sure * that we always recreate every cached entry? */ int xrep_directory( struct xfs_scrub *sc) { … }