// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <[email protected]>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_bmap_btree.h"
#include "xfs_dir2_priv.h"
#include "xfs_trans_space.h"
#include "xfs_health.h"
#include "xfs_exchmaps.h"
#include "xfs_parent.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_ag.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/iscan.h"
#include "scrub/findparent.h"
#include "scrub/readdir.h"
#include "scrub/tempfile.h"
#include "scrub/tempexch.h"
#include "scrub/orphanage.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/xfblob.h"
#include "scrub/attr_repair.h"
#include "scrub/listxattr.h"
/*
* Repairing The Directory Parent Pointer
* ======================================
*
* Currently, only directories support parent pointers (in the form of '..'
* entries), so we simply scan the filesystem and update the '..' entry.
*
* Note that because the only parent pointer is the dotdot entry, we won't
* touch an unhealthy directory, since the directory repair code is perfectly
* capable of rebuilding a directory with the proper parent inode.
*
* See the section on locking issues in dir_repair.c for more information about
* conflicts with the VFS. The findparent code wll keep our incore parent
* inode up to date.
*
* If parent pointers are enabled, we instead reconstruct the parent pointer
* information by visiting every directory entry of every directory in the
* system and translating the relevant dirents into parent pointers. In this
* case, it is advantageous to stash all parent pointers created from dirents
* from a single parent file before replaying them into the temporary file. To
* save memory, the live filesystem scan reuses the findparent object. Parent
* pointer repair chooses either directory scanning or findparent, but not
* both.
*
* When salvaging completes, the remaining stashed entries are replayed to the
* temporary file. All non-parent pointer extended attributes are copied to
* the temporary file's extended attributes. An atomic file mapping exchange
* is used to commit the new xattr blocks to the file being repaired. This
* will disrupt attrmulti cursors.
*/
/* Create a parent pointer in the tempfile. */
#define XREP_PPTR_ADD (1)
/* Remove a parent pointer from the tempfile. */
#define XREP_PPTR_REMOVE (2)
/* A stashed parent pointer update. */
struct xrep_pptr {
/* Cookie for retrieval of the pptr name. */
xfblob_cookie name_cookie;
/* Parent pointer record. */
struct xfs_parent_rec pptr_rec;
/* Length of the pptr name. */
uint8_t namelen;
/* XREP_PPTR_{ADD,REMOVE} */
uint8_t action;
};
/*
* Stash up to 8 pages of recovered parent pointers in pptr_recs and
* pptr_names before we write them to the temp file.
*/
#define XREP_PARENT_MAX_STASH_BYTES (PAGE_SIZE * 8)
struct xrep_parent {
struct xfs_scrub *sc;
/* Fixed-size array of xrep_pptr structures. */
struct xfarray *pptr_recs;
/* Blobs containing parent pointer names. */
struct xfblob *pptr_names;
/* xattr keys */
struct xfarray *xattr_records;
/* xattr values */
struct xfblob *xattr_blobs;
/* Scratch buffers for saving extended attributes */
unsigned char *xattr_name;
void *xattr_value;
unsigned int xattr_value_sz;
/*
* Information used to exchange the attr fork mappings, if the fs
* supports parent pointers.
*/
struct xrep_tempexch tx;
/*
* Information used to scan the filesystem to find the inumber of the
* dotdot entry for this directory. On filesystems without parent
* pointers, we use the findparent_* functions on this object and
* access only the parent_ino field directly.
*
* When parent pointers are enabled, the directory entry scanner uses
* the iscan, hooks, and lock fields of this object directly.
* @pscan.lock coordinates access to pptr_recs, pptr_names, pptr, and
* pptr_scratch. This reduces the memory requirements of this
* structure.
*
* The lock also controls access to xattr_records and xattr_blobs(?)
*/
struct xrep_parent_scan_info pscan;
/* Orphanage reparenting request. */
struct xrep_adoption adoption;
/* Directory entry name, plus the trailing null. */
struct xfs_name xname;
unsigned char namebuf[MAXNAMELEN];
/* Scratch buffer for scanning pptr xattrs */
struct xfs_da_args pptr_args;
/* Have we seen any live updates of parent pointers recently? */
bool saw_pptr_updates;
/* Number of parents we found after all other repairs */
unsigned long long parents;
};
struct xrep_parent_xattr {
/* Cookie for retrieval of the xattr name. */
xfblob_cookie name_cookie;
/* Cookie for retrieval of the xattr value. */
xfblob_cookie value_cookie;
/* XFS_ATTR_* flags */
int flags;
/* Length of the value and name. */
uint32_t valuelen;
uint16_t namelen;
};
/*
* Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
* them to the temp file.
*/
#define XREP_PARENT_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8)
/* Tear down all the incore stuff we created. */
static void
xrep_parent_teardown(
struct xrep_parent *rp)
{
xrep_findparent_scan_teardown(&rp->pscan);
kvfree(rp->xattr_name);
rp->xattr_name = NULL;
kvfree(rp->xattr_value);
rp->xattr_value = NULL;
if (rp->xattr_blobs)
xfblob_destroy(rp->xattr_blobs);
rp->xattr_blobs = NULL;
if (rp->xattr_records)
xfarray_destroy(rp->xattr_records);
rp->xattr_records = NULL;
if (rp->pptr_names)
xfblob_destroy(rp->pptr_names);
rp->pptr_names = NULL;
if (rp->pptr_recs)
xfarray_destroy(rp->pptr_recs);
rp->pptr_recs = NULL;
}
/* Set up for a parent repair. */
int
xrep_setup_parent(
struct xfs_scrub *sc)
{
struct xrep_parent *rp;
int error;
xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
rp = kvzalloc(sizeof(struct xrep_parent), XCHK_GFP_FLAGS);
if (!rp)
return -ENOMEM;
rp->sc = sc;
rp->xname.name = rp->namebuf;
sc->buf = rp;
error = xrep_tempfile_create(sc, S_IFREG);
if (error)
return error;
return xrep_orphanage_try_create(sc);
}
/*
* Scan all files in the filesystem for a child dirent that we can turn into
* the dotdot entry for this directory.
*/
STATIC int
xrep_parent_find_dotdot(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
xfs_ino_t ino;
unsigned int sick, checked;
int error;
/*
* Avoid sick directories. There shouldn't be anyone else clearing the
* directory's sick status.
*/
xfs_inode_measure_sickness(sc->ip, &sick, &checked);
if (sick & XFS_SICK_INO_DIR)
return -EFSCORRUPTED;
ino = xrep_findparent_self_reference(sc);
if (ino != NULLFSINO) {
xrep_findparent_scan_finish_early(&rp->pscan, ino);
return 0;
}
/*
* Drop the ILOCK on this directory so that we can scan for the dotdot
* entry. Figure out who is going to be the parent of this directory,
* then retake the ILOCK so that we can salvage directory entries.
*/
xchk_iunlock(sc, XFS_ILOCK_EXCL);
/* Does the VFS dcache have an answer for us? */
ino = xrep_findparent_from_dcache(sc);
if (ino != NULLFSINO) {
error = xrep_findparent_confirm(sc, &ino);
if (!error && ino != NULLFSINO) {
xrep_findparent_scan_finish_early(&rp->pscan, ino);
goto out_relock;
}
}
/* Scan the entire filesystem for a parent. */
error = xrep_findparent_scan(&rp->pscan);
out_relock:
xchk_ilock(sc, XFS_ILOCK_EXCL);
return error;
}
/*
* Add this stashed incore parent pointer to the temporary file.
* The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
* must not be in transaction context.
*/
STATIC int
xrep_parent_replay_update(
struct xrep_parent *rp,
const struct xfs_name *xname,
struct xrep_pptr *pptr)
{
struct xfs_scrub *sc = rp->sc;
switch (pptr->action) {
case XREP_PPTR_ADD:
/* Create parent pointer. */
trace_xrep_parent_replay_parentadd(sc->tempip, xname,
&pptr->pptr_rec);
return xfs_parent_set(sc->tempip, sc->ip->i_ino, xname,
&pptr->pptr_rec, &rp->pptr_args);
case XREP_PPTR_REMOVE:
/* Remove parent pointer. */
trace_xrep_parent_replay_parentremove(sc->tempip, xname,
&pptr->pptr_rec);
return xfs_parent_unset(sc->tempip, sc->ip->i_ino, xname,
&pptr->pptr_rec, &rp->pptr_args);
}
ASSERT(0);
return -EIO;
}
/*
* Flush stashed parent pointer updates that have been recorded by the scanner.
* This is done to reduce the memory requirements of the parent pointer
* rebuild, since files can have a lot of hardlinks and the fs can be busy.
*
* Caller must not hold transactions or ILOCKs. Caller must hold the tempfile
* IOLOCK.
*/
STATIC int
xrep_parent_replay_updates(
struct xrep_parent *rp)
{
xfarray_idx_t array_cur;
int error;
mutex_lock(&rp->pscan.lock);
foreach_xfarray_idx(rp->pptr_recs, array_cur) {
struct xrep_pptr pptr;
error = xfarray_load(rp->pptr_recs, array_cur, &pptr);
if (error)
goto out_unlock;
error = xfblob_loadname(rp->pptr_names, pptr.name_cookie,
&rp->xname, pptr.namelen);
if (error)
goto out_unlock;
rp->xname.len = pptr.namelen;
mutex_unlock(&rp->pscan.lock);
error = xrep_parent_replay_update(rp, &rp->xname, &pptr);
if (error)
return error;
mutex_lock(&rp->pscan.lock);
}
/* Empty out both arrays now that we've added the entries. */
xfarray_truncate(rp->pptr_recs);
xfblob_truncate(rp->pptr_names);
mutex_unlock(&rp->pscan.lock);
return 0;
out_unlock:
mutex_unlock(&rp->pscan.lock);
return error;
}
/*
* Remember that we want to create a parent pointer in the tempfile. These
* stashed actions will be replayed later.
*/
STATIC int
xrep_parent_stash_parentadd(
struct xrep_parent *rp,
const struct xfs_name *name,
const struct xfs_inode *dp)
{
struct xrep_pptr pptr = {
.action = XREP_PPTR_ADD,
.namelen = name->len,
};
int error;
trace_xrep_parent_stash_parentadd(rp->sc->tempip, dp, name);
xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
error = xfblob_storename(rp->pptr_names, &pptr.name_cookie, name);
if (error)
return error;
return xfarray_append(rp->pptr_recs, &pptr);
}
/*
* Remember that we want to remove a parent pointer from the tempfile. These
* stashed actions will be replayed later.
*/
STATIC int
xrep_parent_stash_parentremove(
struct xrep_parent *rp,
const struct xfs_name *name,
const struct xfs_inode *dp)
{
struct xrep_pptr pptr = {
.action = XREP_PPTR_REMOVE,
.namelen = name->len,
};
int error;
trace_xrep_parent_stash_parentremove(rp->sc->tempip, dp, name);
xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
error = xfblob_storename(rp->pptr_names, &pptr.name_cookie, name);
if (error)
return error;
return xfarray_append(rp->pptr_recs, &pptr);
}
/*
* Examine an entry of a directory. If this dirent leads us back to the file
* whose parent pointers we're rebuilding, add a pptr to the temporary
* directory.
*/
STATIC int
xrep_parent_scan_dirent(
struct xfs_scrub *sc,
struct xfs_inode *dp,
xfs_dir2_dataptr_t dapos,
const struct xfs_name *name,
xfs_ino_t ino,
void *priv)
{
struct xrep_parent *rp = priv;
int error;
/* Dirent doesn't point to this directory. */
if (ino != rp->sc->ip->i_ino)
return 0;
/* No weird looking names. */
if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
return -EFSCORRUPTED;
/* No mismatching ftypes. */
if (name->type != xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode))
return -EFSCORRUPTED;
/* Don't pick up dot or dotdot entries; we only want child dirents. */
if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
xfs_dir2_samename(name, &xfs_name_dot))
return 0;
/*
* Transform this dirent into a parent pointer and queue it for later
* addition to the temporary file.
*/
mutex_lock(&rp->pscan.lock);
error = xrep_parent_stash_parentadd(rp, name, dp);
mutex_unlock(&rp->pscan.lock);
return error;
}
/*
* Decide if we want to look for dirents in this directory. Skip the file
* being repaired and any files being used to stage repairs.
*/
static inline bool
xrep_parent_want_scan(
struct xrep_parent *rp,
const struct xfs_inode *ip)
{
return ip != rp->sc->ip && !xrep_is_tempfile(ip);
}
/*
* Take ILOCK on a file that we want to scan.
*
* Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt.
* Otherwise, take ILOCK_SHARED.
*/
static inline unsigned int
xrep_parent_scan_ilock(
struct xrep_parent *rp,
struct xfs_inode *ip)
{
uint lock_mode = XFS_ILOCK_SHARED;
/* Still need to take the shared ILOCK to advance the iscan cursor. */
if (!xrep_parent_want_scan(rp, ip))
goto lock;
if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
lock_mode = XFS_ILOCK_EXCL;
goto lock;
}
lock:
xfs_ilock(ip, lock_mode);
return lock_mode;
}
/*
* Scan this file for relevant child dirents that point to the file whose
* parent pointers we're rebuilding.
*/
STATIC int
xrep_parent_scan_file(
struct xrep_parent *rp,
struct xfs_inode *ip)
{
unsigned int lock_mode;
int error = 0;
lock_mode = xrep_parent_scan_ilock(rp, ip);
if (!xrep_parent_want_scan(rp, ip))
goto scan_done;
if (S_ISDIR(VFS_I(ip)->i_mode)) {
/*
* If the directory looks as though it has been zapped by the
* inode record repair code, we cannot scan for child dirents.
*/
if (xchk_dir_looks_zapped(ip)) {
error = -EBUSY;
goto scan_done;
}
error = xchk_dir_walk(rp->sc, ip, xrep_parent_scan_dirent, rp);
if (error)
goto scan_done;
}
scan_done:
xchk_iscan_mark_visited(&rp->pscan.iscan, ip);
xfs_iunlock(ip, lock_mode);
return error;
}
/* Decide if we've stashed too much pptr data in memory. */
static inline bool
xrep_parent_want_flush_stashed(
struct xrep_parent *rp)
{
unsigned long long bytes;
bytes = xfarray_bytes(rp->pptr_recs) + xfblob_bytes(rp->pptr_names);
return bytes > XREP_PARENT_MAX_STASH_BYTES;
}
/*
* Scan all directories in the filesystem to look for dirents that we can turn
* into parent pointers.
*/
STATIC int
xrep_parent_scan_dirtree(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
struct xfs_inode *ip;
int error;
/*
* Filesystem scans are time consuming. Drop the file ILOCK and all
* other resources for the duration of the scan and hope for the best.
* The live update hooks will keep our scan information up to date.
*/
xchk_trans_cancel(sc);
if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
XFS_ILOCK_EXCL));
error = xchk_trans_alloc_empty(sc);
if (error)
return error;
while ((error = xchk_iscan_iter(&rp->pscan.iscan, &ip)) == 1) {
bool flush;
error = xrep_parent_scan_file(rp, ip);
xchk_irele(sc, ip);
if (error)
break;
/* Flush stashed pptr updates to constrain memory usage. */
mutex_lock(&rp->pscan.lock);
flush = xrep_parent_want_flush_stashed(rp);
mutex_unlock(&rp->pscan.lock);
if (flush) {
xchk_trans_cancel(sc);
error = xrep_tempfile_iolock_polled(sc);
if (error)
break;
error = xrep_parent_replay_updates(rp);
xrep_tempfile_iounlock(sc);
if (error)
break;
error = xchk_trans_alloc_empty(sc);
if (error)
break;
}
if (xchk_should_terminate(sc, &error))
break;
}
xchk_iscan_iter_finish(&rp->pscan.iscan);
if (error) {
/*
* If we couldn't grab an inode that was busy with a state
* change, change the error code so that we exit to userspace
* as quickly as possible.
*/
if (error == -EBUSY)
return -ECANCELED;
return error;
}
/*
* Retake sc->ip's ILOCK now that we're done flushing stashed parent
* pointers. We end this function with an empty transaction and the
* ILOCK.
*/
xchk_ilock(rp->sc, XFS_ILOCK_EXCL);
return 0;
}
/*
* Capture dirent updates being made by other threads which are relevant to the
* file being repaired.
*/
STATIC int
xrep_parent_live_update(
struct notifier_block *nb,
unsigned long action,
void *data)
{
struct xfs_dir_update_params *p = data;
struct xrep_parent *rp;
struct xfs_scrub *sc;
int error;
rp = container_of(nb, struct xrep_parent, pscan.dhook.dirent_hook.nb);
sc = rp->sc;
/*
* This thread updated a dirent that points to the file that we're
* repairing, so stash the update for replay against the temporary
* file.
*/
if (p->ip->i_ino == sc->ip->i_ino &&
xchk_iscan_want_live_update(&rp->pscan.iscan, p->dp->i_ino)) {
mutex_lock(&rp->pscan.lock);
if (p->delta > 0)
error = xrep_parent_stash_parentadd(rp, p->name, p->dp);
else
error = xrep_parent_stash_parentremove(rp, p->name,
p->dp);
if (!error)
rp->saw_pptr_updates = true;
mutex_unlock(&rp->pscan.lock);
if (error)
goto out_abort;
}
return NOTIFY_DONE;
out_abort:
xchk_iscan_abort(&rp->pscan.iscan);
return NOTIFY_DONE;
}
/* Reset a directory's dotdot entry, if needed. */
STATIC int
xrep_parent_reset_dotdot(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
xfs_ino_t ino;
unsigned int spaceres;
int error = 0;
ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &ino);
if (error || ino == rp->pscan.parent_ino)
return error;
xfs_trans_ijoin(sc->tp, sc->ip, 0);
trace_xrep_parent_reset_dotdot(sc->ip, rp->pscan.parent_ino);
/*
* Reserve more space just in case we have to expand the dir. We're
* allowed to exceed quota to repair inconsistent metadata.
*/
spaceres = xfs_rename_space_res(sc->mp, 0, false, xfs_name_dotdot.len,
false);
error = xfs_trans_reserve_more_inode(sc->tp, sc->ip, spaceres, 0,
true);
if (error)
return error;
error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot,
rp->pscan.parent_ino, spaceres);
if (error)
return error;
/*
* Roll transaction to detach the inode from the transaction but retain
* ILOCK_EXCL.
*/
return xfs_trans_roll(&sc->tp);
}
/* Pass back the parent inumber if this a parent pointer */
STATIC int
xrep_parent_lookup_pptr(
struct xfs_scrub *sc,
struct xfs_inode *ip,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
const void *value,
unsigned int valuelen,
void *priv)
{
xfs_ino_t *inop = priv;
xfs_ino_t parent_ino;
int error;
if (!(attr_flags & XFS_ATTR_PARENT))
return 0;
error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
valuelen, &parent_ino, NULL);
if (error)
return error;
*inop = parent_ino;
return -ECANCELED;
}
/*
* Find the first parent of the scrub target by walking parent pointers for
* the purpose of deciding if we're going to move it to the orphanage.
* We don't care if the attr fork is zapped.
*/
STATIC int
xrep_parent_lookup_pptrs(
struct xfs_scrub *sc,
xfs_ino_t *inop)
{
int error;
*inop = NULLFSINO;
error = xchk_xattr_walk(sc, sc->ip, xrep_parent_lookup_pptr, NULL,
inop);
if (error && error != -ECANCELED)
return error;
return 0;
}
/*
* Move the current file to the orphanage.
*
* Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks. Upon
* successful return, the scrub transaction will have enough extra reservation
* to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the
* orphanage; and both inodes will be ijoined.
*/
STATIC int
xrep_parent_move_to_orphanage(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
xfs_ino_t orig_parent, new_parent;
int error;
if (S_ISDIR(VFS_I(sc->ip)->i_mode)) {
/*
* We are about to drop the ILOCK on sc->ip to lock the
* orphanage and prepare for the adoption. Therefore, look up
* the old dotdot entry for sc->ip so that we can compare it
* after we re-lock sc->ip.
*/
error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot,
&orig_parent);
if (error)
return error;
} else {
/*
* We haven't dropped the ILOCK since we committed the new
* xattr structure (and hence the new parent pointer records),
* which means that the file cannot have been moved in the
* directory tree, and there are no parents.
*/
orig_parent = NULLFSINO;
}
/*
* Drop the ILOCK on the scrub target and commit the transaction.
* Adoption computes its own resource requirements and gathers the
* necessary components.
*/
error = xrep_trans_commit(sc);
if (error)
return error;
xchk_iunlock(sc, XFS_ILOCK_EXCL);
/* If we can take the orphanage's iolock then we're ready to move. */
if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
xchk_iunlock(sc, sc->ilock_flags);
error = xrep_orphanage_iolock_two(sc);
if (error)
return error;
}
/* Grab transaction and ILOCK the two files. */
error = xrep_adoption_trans_alloc(sc, &rp->adoption);
if (error)
return error;
error = xrep_adoption_compute_name(&rp->adoption, &rp->xname);
if (error)
return error;
/*
* Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
* entry again. If the parent changed or the child was unlinked while
* the child directory was unlocked, we don't need to move the child to
* the orphanage after all. For a non-directory, we have to scan for
* the first parent pointer to see if one has been added.
*/
if (S_ISDIR(VFS_I(sc->ip)->i_mode))
error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot,
&new_parent);
else
error = xrep_parent_lookup_pptrs(sc, &new_parent);
if (error)
return error;
/*
* Attach to the orphanage if we still have a linked directory and it
* hasn't been moved.
*/
if (orig_parent == new_parent && VFS_I(sc->ip)->i_nlink > 0) {
error = xrep_adoption_move(&rp->adoption);
if (error)
return error;
}
/*
* Launder the scrub transaction so we can drop the orphanage ILOCK
* and IOLOCK. Return holding the scrub target's ILOCK and IOLOCK.
*/
error = xrep_adoption_trans_roll(&rp->adoption);
if (error)
return error;
xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL);
xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
return 0;
}
/* Ensure that the xattr value buffer is large enough. */
STATIC int
xrep_parent_alloc_xattr_value(
struct xrep_parent *rp,
size_t bufsize)
{
void *new_val;
if (rp->xattr_value_sz >= bufsize)
return 0;
if (rp->xattr_value) {
kvfree(rp->xattr_value);
rp->xattr_value = NULL;
rp->xattr_value_sz = 0;
}
new_val = kvmalloc(bufsize, XCHK_GFP_FLAGS);
if (!new_val)
return -ENOMEM;
rp->xattr_value = new_val;
rp->xattr_value_sz = bufsize;
return 0;
}
/* Retrieve the (remote) value of a non-pptr xattr. */
STATIC int
xrep_parent_fetch_xattr_remote(
struct xrep_parent *rp,
struct xfs_inode *ip,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
unsigned int valuelen)
{
struct xfs_scrub *sc = rp->sc;
struct xfs_da_args args = {
.attr_filter = attr_flags & XFS_ATTR_NSP_ONDISK_MASK,
.geo = sc->mp->m_attr_geo,
.whichfork = XFS_ATTR_FORK,
.dp = ip,
.name = name,
.namelen = namelen,
.trans = sc->tp,
.valuelen = valuelen,
.owner = ip->i_ino,
};
int error;
/*
* If we need a larger value buffer, try to allocate one. If that
* fails, return with -EDEADLOCK to try harder.
*/
error = xrep_parent_alloc_xattr_value(rp, valuelen);
if (error == -ENOMEM)
return -EDEADLOCK;
if (error)
return error;
args.value = rp->xattr_value;
xfs_attr_sethash(&args);
return xfs_attr_get_ilocked(&args);
}
/* Stash non-pptr attributes for later replay into the temporary file. */
STATIC int
xrep_parent_stash_xattr(
struct xfs_scrub *sc,
struct xfs_inode *ip,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
const void *value,
unsigned int valuelen,
void *priv)
{
struct xrep_parent_xattr key = {
.valuelen = valuelen,
.namelen = namelen,
.flags = attr_flags & XFS_ATTR_NSP_ONDISK_MASK,
};
struct xrep_parent *rp = priv;
int error;
if (attr_flags & (XFS_ATTR_INCOMPLETE | XFS_ATTR_PARENT))
return 0;
if (!value) {
error = xrep_parent_fetch_xattr_remote(rp, ip, attr_flags,
name, namelen, valuelen);
if (error)
return error;
value = rp->xattr_value;
}
trace_xrep_parent_stash_xattr(rp->sc->tempip, key.flags, (void *)name,
key.namelen, key.valuelen);
error = xfblob_store(rp->xattr_blobs, &key.name_cookie, name,
key.namelen);
if (error)
return error;
error = xfblob_store(rp->xattr_blobs, &key.value_cookie, value,
key.valuelen);
if (error)
return error;
return xfarray_append(rp->xattr_records, &key);
}
/* Insert one xattr key/value. */
STATIC int
xrep_parent_insert_xattr(
struct xrep_parent *rp,
const struct xrep_parent_xattr *key)
{
struct xfs_da_args args = {
.dp = rp->sc->tempip,
.attr_filter = key->flags,
.namelen = key->namelen,
.valuelen = key->valuelen,
.owner = rp->sc->ip->i_ino,
.geo = rp->sc->mp->m_attr_geo,
.whichfork = XFS_ATTR_FORK,
.op_flags = XFS_DA_OP_OKNOENT,
};
int error;
ASSERT(!(key->flags & XFS_ATTR_PARENT));
/*
* Grab pointers to the scrub buffer so that we can use them to insert
* attrs into the temp file.
*/
args.name = rp->xattr_name;
args.value = rp->xattr_value;
/*
* The attribute name is stored near the end of the in-core buffer,
* though we reserve one more byte to ensure null termination.
*/
rp->xattr_name[XATTR_NAME_MAX] = 0;
error = xfblob_load(rp->xattr_blobs, key->name_cookie, rp->xattr_name,
key->namelen);
if (error)
return error;
error = xfblob_free(rp->xattr_blobs, key->name_cookie);
if (error)
return error;
error = xfblob_load(rp->xattr_blobs, key->value_cookie, args.value,
key->valuelen);
if (error)
return error;
error = xfblob_free(rp->xattr_blobs, key->value_cookie);
if (error)
return error;
rp->xattr_name[key->namelen] = 0;
trace_xrep_parent_insert_xattr(rp->sc->tempip, key->flags,
rp->xattr_name, key->namelen, key->valuelen);
xfs_attr_sethash(&args);
return xfs_attr_set(&args, XFS_ATTRUPDATE_UPSERT, false);
}
/*
* Periodically flush salvaged attributes to the temporary file. This is done
* to reduce the memory requirements of the xattr rebuild because files can
* contain millions of attributes.
*/
STATIC int
xrep_parent_flush_xattrs(
struct xrep_parent *rp)
{
xfarray_idx_t array_cur;
int error;
/*
* Entering this function, the scrub context has a reference to the
* inode being repaired, the temporary file, and the empty scrub
* transaction that we created for the xattr scan. We hold ILOCK_EXCL
* on the inode being repaired.
*
* To constrain kernel memory use, we occasionally flush salvaged
* xattrs from the xfarray and xfblob structures into the temporary
* file in preparation for exchanging the xattr structures at the end.
* Updating the temporary file requires a transaction, so we commit the
* scrub transaction and drop the ILOCK so that xfs_attr_set can
* allocate whatever transaction it wants.
*
* We still hold IOLOCK_EXCL on the inode being repaired, which
* prevents anyone from adding xattrs (or parent pointers) while we're
* flushing.
*/
xchk_trans_cancel(rp->sc);
xchk_iunlock(rp->sc, XFS_ILOCK_EXCL);
/*
* Take the IOLOCK of the temporary file while we modify xattrs. This
* isn't strictly required because the temporary file is never revealed
* to userspace, but we follow the same locking rules. We still hold
* sc->ip's IOLOCK.
*/
error = xrep_tempfile_iolock_polled(rp->sc);
if (error)
return error;
/* Add all the salvaged attrs to the temporary file. */
foreach_xfarray_idx(rp->xattr_records, array_cur) {
struct xrep_parent_xattr key;
error = xfarray_load(rp->xattr_records, array_cur, &key);
if (error)
return error;
error = xrep_parent_insert_xattr(rp, &key);
if (error)
return error;
}
/* Empty out both arrays now that we've added the entries. */
xfarray_truncate(rp->xattr_records);
xfblob_truncate(rp->xattr_blobs);
xrep_tempfile_iounlock(rp->sc);
/* Recreate the empty transaction and relock the inode. */
error = xchk_trans_alloc_empty(rp->sc);
if (error)
return error;
xchk_ilock(rp->sc, XFS_ILOCK_EXCL);
return 0;
}
/* Decide if we've stashed too much xattr data in memory. */
static inline bool
xrep_parent_want_flush_xattrs(
struct xrep_parent *rp)
{
unsigned long long bytes;
bytes = xfarray_bytes(rp->xattr_records) +
xfblob_bytes(rp->xattr_blobs);
return bytes > XREP_PARENT_XATTR_MAX_STASH_BYTES;
}
/* Flush staged attributes to the temporary file if we're over the limit. */
STATIC int
xrep_parent_try_flush_xattrs(
struct xfs_scrub *sc,
void *priv)
{
struct xrep_parent *rp = priv;
int error;
if (!xrep_parent_want_flush_xattrs(rp))
return 0;
error = xrep_parent_flush_xattrs(rp);
if (error)
return error;
/*
* If there were any parent pointer updates to the xattr structure
* while we dropped the ILOCK, the xattr structure is now stale.
* Signal to the attr copy process that we need to start over, but
* this time without opportunistic attr flushing.
*
* This is unlikely to happen, so we're ok with restarting the copy.
*/
mutex_lock(&rp->pscan.lock);
if (rp->saw_pptr_updates)
error = -ESTALE;
mutex_unlock(&rp->pscan.lock);
return error;
}
/* Copy all the non-pptr extended attributes into the temporary file. */
STATIC int
xrep_parent_copy_xattrs(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
int error;
/*
* Clear the pptr updates flag. We hold sc->ip ILOCKed, so there
* can't be any parent pointer updates in progress.
*/
mutex_lock(&rp->pscan.lock);
rp->saw_pptr_updates = false;
mutex_unlock(&rp->pscan.lock);
/* Copy xattrs, stopping periodically to flush the incore buffers. */
error = xchk_xattr_walk(sc, sc->ip, xrep_parent_stash_xattr,
xrep_parent_try_flush_xattrs, rp);
if (error && error != -ESTALE)
return error;
if (error == -ESTALE) {
/*
* The xattr copy collided with a parent pointer update.
* Restart the copy, but this time hold the ILOCK all the way
* to the end to lock out any directory parent pointer updates.
*/
error = xchk_xattr_walk(sc, sc->ip, xrep_parent_stash_xattr,
NULL, rp);
if (error)
return error;
}
/* Flush any remaining stashed xattrs to the temporary file. */
if (xfarray_bytes(rp->xattr_records) == 0)
return 0;
return xrep_parent_flush_xattrs(rp);
}
/*
* Ensure that @sc->ip and @sc->tempip both have attribute forks before we head
* into the attr fork exchange transaction. All files on a filesystem with
* parent pointers must have an attr fork because the parent pointer code does
* not itself add attribute forks.
*
* Note: Unlinkable unlinked files don't need one, but the overhead of having
* an unnecessary attr fork is not justified by the additional code complexity
* that would be needed to track that state correctly.
*/
STATIC int
xrep_parent_ensure_attr_fork(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
int error;
error = xfs_attr_add_fork(sc->tempip,
sizeof(struct xfs_attr_sf_hdr), 1);
if (error)
return error;
return xfs_attr_add_fork(sc->ip, sizeof(struct xfs_attr_sf_hdr), 1);
}
/*
* Finish replaying stashed parent pointer updates, allocate a transaction for
* exchanging extent mappings, and take the ILOCKs of both files before we
* commit the new attribute structure.
*/
STATIC int
xrep_parent_finalize_tempfile(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
int error;
/*
* Repair relies on the ILOCK to quiesce all possible xattr updates.
* Replay all queued parent pointer updates into the tempfile before
* exchanging the contents, even if that means dropping the ILOCKs and
* the transaction.
*/
do {
error = xrep_parent_replay_updates(rp);
if (error)
return error;
error = xrep_parent_ensure_attr_fork(rp);
if (error)
return error;
error = xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rp->tx);
if (error)
return error;
if (xfarray_length(rp->pptr_recs) == 0)
break;
xchk_trans_cancel(sc);
xrep_tempfile_iunlock_both(sc);
} while (!xchk_should_terminate(sc, &error));
return error;
}
/*
* Replay all the stashed parent pointers into the temporary file, copy all
* the non-pptr xattrs from the file being repaired into the temporary file,
* and exchange the attr fork contents atomically.
*/
STATIC int
xrep_parent_rebuild_pptrs(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
xfs_ino_t parent_ino = NULLFSINO;
int error;
/*
* Copy non-ppttr xattrs from the file being repaired into the
* temporary file's xattr structure. We hold sc->ip's IOLOCK, which
* prevents setxattr/removexattr calls from occurring, but renames
* update the parent pointers without holding IOLOCK. If we detect
* stale attr structures, we restart the scan but only flush at the
* end.
*/
error = xrep_parent_copy_xattrs(rp);
if (error)
return error;
/*
* Cancel the empty transaction that we used to walk and copy attrs,
* and drop the ILOCK so that we can take the IOLOCK on the temporary
* file. We still hold sc->ip's IOLOCK.
*/
xchk_trans_cancel(sc);
xchk_iunlock(sc, XFS_ILOCK_EXCL);
error = xrep_tempfile_iolock_polled(sc);
if (error)
return error;
/*
* Allocate transaction, lock inodes, and make sure that we've replayed
* all the stashed pptr updates to the tempdir. After this point,
* we're ready to exchange the attr fork mappings.
*/
error = xrep_parent_finalize_tempfile(rp);
if (error)
return error;
/* Last chance to abort before we start committing pptr fixes. */
if (xchk_should_terminate(sc, &error))
return error;
if (xchk_iscan_aborted(&rp->pscan.iscan))
return -ECANCELED;
/*
* Exchange the attr fork contents and junk the old attr fork contents,
* which are now in the tempfile.
*/
error = xrep_xattr_swap(sc, &rp->tx);
if (error)
return error;
error = xrep_xattr_reset_tempfile_fork(sc);
if (error)
return error;
/*
* Roll to get a transaction without any inodes joined to it. Then we
* can drop the tempfile's ILOCK and IOLOCK before doing more work on
* the scrub target file.
*/
error = xfs_trans_roll(&sc->tp);
if (error)
return error;
xrep_tempfile_iunlock(sc);
xrep_tempfile_iounlock(sc);
/*
* We've committed the new parent pointers. Find at least one parent
* so that we can decide if we're moving this file to the orphanage.
* For this purpose, root directories are their own parents.
*/
if (sc->ip == sc->mp->m_rootip) {
xrep_findparent_scan_found(&rp->pscan, sc->ip->i_ino);
} else {
error = xrep_parent_lookup_pptrs(sc, &parent_ino);
if (error)
return error;
if (parent_ino != NULLFSINO)
xrep_findparent_scan_found(&rp->pscan, parent_ino);
}
return 0;
}
/*
* Commit the new parent pointer structure (currently only the dotdot entry) to
* the file that we're repairing.
*/
STATIC int
xrep_parent_rebuild_tree(
struct xrep_parent *rp)
{
int error;
if (xfs_has_parent(rp->sc->mp)) {
error = xrep_parent_rebuild_pptrs(rp);
if (error)
return error;
}
if (rp->pscan.parent_ino == NULLFSINO) {
if (xrep_orphanage_can_adopt(rp->sc))
return xrep_parent_move_to_orphanage(rp);
return -EFSCORRUPTED;
}
if (S_ISDIR(VFS_I(rp->sc->ip)->i_mode))
return xrep_parent_reset_dotdot(rp);
return 0;
}
/* Count the number of parent pointers. */
STATIC int
xrep_parent_count_pptr(
struct xfs_scrub *sc,
struct xfs_inode *ip,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
const void *value,
unsigned int valuelen,
void *priv)
{
struct xrep_parent *rp = priv;
int error;
if (!(attr_flags & XFS_ATTR_PARENT))
return 0;
error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
valuelen, NULL, NULL);
if (error)
return error;
rp->parents++;
return 0;
}
/*
* After all parent pointer rebuilding and adoption activity completes, reset
* the link count of this nondirectory, having scanned the fs to rebuild all
* parent pointers.
*/
STATIC int
xrep_parent_set_nondir_nlink(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
struct xfs_inode *ip = sc->ip;
struct xfs_perag *pag;
bool joined = false;
int error;
/* Count parent pointers so we can reset the file link count. */
rp->parents = 0;
error = xchk_xattr_walk(sc, ip, xrep_parent_count_pptr, NULL, rp);
if (error)
return error;
if (rp->parents > 0 && xfs_inode_on_unlinked_list(ip)) {
xfs_trans_ijoin(sc->tp, sc->ip, 0);
joined = true;
/*
* The file is on the unlinked list but we found parents.
* Remove the file from the unlinked list.
*/
pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, ip->i_ino));
if (!pag) {
ASSERT(0);
return -EFSCORRUPTED;
}
error = xfs_iunlink_remove(sc->tp, pag, ip);
xfs_perag_put(pag);
if (error)
return error;
} else if (rp->parents == 0 && !xfs_inode_on_unlinked_list(ip)) {
xfs_trans_ijoin(sc->tp, sc->ip, 0);
joined = true;
/*
* The file is not on the unlinked list but we found no
* parents. Add the file to the unlinked list.
*/
error = xfs_iunlink(sc->tp, ip);
if (error)
return error;
}
/* Set the correct link count. */
if (VFS_I(ip)->i_nlink != rp->parents) {
if (!joined) {
xfs_trans_ijoin(sc->tp, sc->ip, 0);
joined = true;
}
set_nlink(VFS_I(ip), min_t(unsigned long long, rp->parents,
XFS_NLINK_PINNED));
}
/* Log the inode to keep it moving forward if we dirtied anything. */
if (joined)
xfs_trans_log_inode(sc->tp, ip, XFS_ILOG_CORE);
return 0;
}
/* Set up the filesystem scan so we can look for parents. */
STATIC int
xrep_parent_setup_scan(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
char *descr;
struct xfs_da_geometry *geo = sc->mp->m_attr_geo;
int max_len;
int error;
if (!xfs_has_parent(sc->mp))
return xrep_findparent_scan_start(sc, &rp->pscan);
/* Buffers for copying non-pptr attrs to the tempfile */
rp->xattr_name = kvmalloc(XATTR_NAME_MAX + 1, XCHK_GFP_FLAGS);
if (!rp->xattr_name)
return -ENOMEM;
/*
* Allocate enough memory to handle loading local attr values from the
* xfblob data while flushing stashed attrs to the temporary file.
* We only realloc the buffer when salvaging remote attr values, so
* TRY_HARDER means we allocate the maximal attr value size.
*/
if (sc->flags & XCHK_TRY_HARDER)
max_len = XATTR_SIZE_MAX;
else
max_len = xfs_attr_leaf_entsize_local_max(geo->blksize);
error = xrep_parent_alloc_xattr_value(rp, max_len);
if (error)
goto out_xattr_name;
/* Set up some staging memory for logging parent pointer updates. */
descr = xchk_xfile_ino_descr(sc, "parent pointer entries");
error = xfarray_create(descr, 0, sizeof(struct xrep_pptr),
&rp->pptr_recs);
kfree(descr);
if (error)
goto out_xattr_value;
descr = xchk_xfile_ino_descr(sc, "parent pointer names");
error = xfblob_create(descr, &rp->pptr_names);
kfree(descr);
if (error)
goto out_recs;
/* Set up some storage for copying attrs before the mapping exchange */
descr = xchk_xfile_ino_descr(sc,
"parent pointer retained xattr entries");
error = xfarray_create(descr, 0, sizeof(struct xrep_parent_xattr),
&rp->xattr_records);
kfree(descr);
if (error)
goto out_names;
descr = xchk_xfile_ino_descr(sc,
"parent pointer retained xattr values");
error = xfblob_create(descr, &rp->xattr_blobs);
kfree(descr);
if (error)
goto out_attr_keys;
error = __xrep_findparent_scan_start(sc, &rp->pscan,
xrep_parent_live_update);
if (error)
goto out_attr_values;
return 0;
out_attr_values:
xfblob_destroy(rp->xattr_blobs);
rp->xattr_blobs = NULL;
out_attr_keys:
xfarray_destroy(rp->xattr_records);
rp->xattr_records = NULL;
out_names:
xfblob_destroy(rp->pptr_names);
rp->pptr_names = NULL;
out_recs:
xfarray_destroy(rp->pptr_recs);
rp->pptr_recs = NULL;
out_xattr_value:
kvfree(rp->xattr_value);
rp->xattr_value = NULL;
out_xattr_name:
kvfree(rp->xattr_name);
rp->xattr_name = NULL;
return error;
}
int
xrep_parent(
struct xfs_scrub *sc)
{
struct xrep_parent *rp = sc->buf;
int error;
/*
* When the parent pointers feature is enabled, repairs are committed
* by atomically committing a new xattr structure and reaping the old
* attr fork. Reaping requires rmap and exchange-range to be enabled.
*/
if (xfs_has_parent(sc->mp)) {
if (!xfs_has_rmapbt(sc->mp))
return -EOPNOTSUPP;
if (!xfs_has_exchange_range(sc->mp))
return -EOPNOTSUPP;
}
error = xrep_parent_setup_scan(rp);
if (error)
return error;
if (xfs_has_parent(sc->mp))
error = xrep_parent_scan_dirtree(rp);
else
error = xrep_parent_find_dotdot(rp);
if (error)
goto out_teardown;
/* Last chance to abort before we start committing dotdot fixes. */
if (xchk_should_terminate(sc, &error))
goto out_teardown;
error = xrep_parent_rebuild_tree(rp);
if (error)
goto out_teardown;
if (xfs_has_parent(sc->mp) && !S_ISDIR(VFS_I(sc->ip)->i_mode)) {
error = xrep_parent_set_nondir_nlink(rp);
if (error)
goto out_teardown;
}
error = xrep_defer_finish(sc);
out_teardown:
xrep_parent_teardown(rp);
return error;
}