linux/fs/xfs/scrub/dabtree.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <[email protected]>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_attr_leaf.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/dabtree.h"

/* Directory/Attribute Btree */

/*
 * Check for da btree operation errors.  See the section about handling
 * operational errors in common.c.
 */
bool
xchk_da_process_error(
	struct xchk_da_btree	*ds,
	int			level,
	int			*error)
{
	struct xfs_scrub	*sc = ds->sc;

	if (*error == 0)
		return true;

	switch (*error) {
	case -EDEADLOCK:
	case -ECHRNG:
		/* Used to restart an op with deadlock avoidance. */
		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
		break;
	case -EFSBADCRC:
	case -EFSCORRUPTED:
		/* Note the badness but don't abort. */
		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
		*error = 0;
		fallthrough;
	default:
		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
				xfs_dir2_da_to_db(ds->dargs.geo,
					ds->state->path.blk[level].blkno),
				*error, __return_address);
		break;
	}
	return false;
}

/*
 * Check for da btree corruption.  See the section about handling
 * operational errors in common.c.
 */
void
xchk_da_set_corrupt(
	struct xchk_da_btree	*ds,
	int			level)
{
	struct xfs_scrub	*sc = ds->sc;

	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;

	trace_xchk_fblock_error(sc, ds->dargs.whichfork,
			xfs_dir2_da_to_db(ds->dargs.geo,
				ds->state->path.blk[level].blkno),
			__return_address);
}

/* Flag a da btree node in need of optimization. */
void
xchk_da_set_preen(
	struct xchk_da_btree	*ds,
	int			level)
{
	struct xfs_scrub	*sc = ds->sc;

	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
	trace_xchk_fblock_preen(sc, ds->dargs.whichfork,
			xfs_dir2_da_to_db(ds->dargs.geo,
				ds->state->path.blk[level].blkno),
			__return_address);
}

/* Find an entry at a certain level in a da btree. */
static struct xfs_da_node_entry *
xchk_da_btree_node_entry(
	struct xchk_da_btree		*ds,
	int				level)
{
	struct xfs_da_state_blk		*blk = &ds->state->path.blk[level];
	struct xfs_da3_icnode_hdr	hdr;

	ASSERT(blk->magic == XFS_DA_NODE_MAGIC);

	xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
	return hdr.btree + blk->index;
}

/* Scrub a da btree hash (key). */
int
xchk_da_btree_hash(
	struct xchk_da_btree		*ds,
	int				level,
	__be32				*hashp)
{
	struct xfs_da_node_entry	*entry;
	xfs_dahash_t			hash;
	xfs_dahash_t			parent_hash;

	/* Is this hash in order? */
	hash = be32_to_cpu(*hashp);
	if (hash < ds->hashes[level])
		xchk_da_set_corrupt(ds, level);
	ds->hashes[level] = hash;

	if (level == 0)
		return 0;

	/* Is this hash no larger than the parent hash? */
	entry = xchk_da_btree_node_entry(ds, level - 1);
	parent_hash = be32_to_cpu(entry->hashval);
	if (parent_hash < hash)
		xchk_da_set_corrupt(ds, level);

	return 0;
}

/*
 * Check a da btree pointer.  Returns true if it's ok to use this
 * pointer.
 */
STATIC bool
xchk_da_btree_ptr_ok(
	struct xchk_da_btree	*ds,
	int			level,
	xfs_dablk_t		blkno)
{
	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
		xchk_da_set_corrupt(ds, level);
		return false;
	}

	return true;
}

/*
 * The da btree scrubber can handle leaf1 blocks as a degenerate
 * form of leafn blocks.  Since the regular da code doesn't handle
 * leaf1, we must multiplex the verifiers.
 */
static void
xchk_da_btree_read_verify(
	struct xfs_buf		*bp)
{
	struct xfs_da_blkinfo	*info = bp->b_addr;

	switch (be16_to_cpu(info->magic)) {
	case XFS_DIR2_LEAF1_MAGIC:
	case XFS_DIR3_LEAF1_MAGIC:
		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
		bp->b_ops->verify_read(bp);
		return;
	default:
		/*
		 * xfs_da3_node_buf_ops already know how to handle
		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
		 */
		bp->b_ops = &xfs_da3_node_buf_ops;
		bp->b_ops->verify_read(bp);
		return;
	}
}
static void
xchk_da_btree_write_verify(
	struct xfs_buf		*bp)
{
	struct xfs_da_blkinfo	*info = bp->b_addr;

	switch (be16_to_cpu(info->magic)) {
	case XFS_DIR2_LEAF1_MAGIC:
	case XFS_DIR3_LEAF1_MAGIC:
		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
		bp->b_ops->verify_write(bp);
		return;
	default:
		/*
		 * xfs_da3_node_buf_ops already know how to handle
		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
		 */
		bp->b_ops = &xfs_da3_node_buf_ops;
		bp->b_ops->verify_write(bp);
		return;
	}
}
static void *
xchk_da_btree_verify(
	struct xfs_buf		*bp)
{
	struct xfs_da_blkinfo	*info = bp->b_addr;

	switch (be16_to_cpu(info->magic)) {
	case XFS_DIR2_LEAF1_MAGIC:
	case XFS_DIR3_LEAF1_MAGIC:
		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
		return bp->b_ops->verify_struct(bp);
	default:
		bp->b_ops = &xfs_da3_node_buf_ops;
		return bp->b_ops->verify_struct(bp);
	}
}

static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
	.name = "xchk_da_btree",
	.verify_read = xchk_da_btree_read_verify,
	.verify_write = xchk_da_btree_write_verify,
	.verify_struct = xchk_da_btree_verify,
};

/* Check a block's sibling. */
STATIC int
xchk_da_btree_block_check_sibling(
	struct xchk_da_btree	*ds,
	int			level,
	int			direction,
	xfs_dablk_t		sibling)
{
	struct xfs_da_state_path *path = &ds->state->path;
	struct xfs_da_state_path *altpath = &ds->state->altpath;
	int			retval;
	int			plevel;
	int			error;

	memcpy(altpath, path, sizeof(ds->state->altpath));

	/*
	 * If the pointer is null, we shouldn't be able to move the upper
	 * level pointer anywhere.
	 */
	if (sibling == 0) {
		error = xfs_da3_path_shift(ds->state, altpath, direction,
				false, &retval);
		if (error == 0 && retval == 0)
			xchk_da_set_corrupt(ds, level);
		error = 0;
		goto out;
	}

	/* Move the alternate cursor one block in the direction given. */
	error = xfs_da3_path_shift(ds->state, altpath, direction, false,
			&retval);
	if (!xchk_da_process_error(ds, level, &error))
		goto out;
	if (retval) {
		xchk_da_set_corrupt(ds, level);
		goto out;
	}
	if (altpath->blk[level].bp)
		xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);

	/* Compare upper level pointer to sibling pointer. */
	if (altpath->blk[level].blkno != sibling)
		xchk_da_set_corrupt(ds, level);

out:
	/* Free all buffers in the altpath that aren't referenced from path. */
	for (plevel = 0; plevel < altpath->active; plevel++) {
		if (altpath->blk[plevel].bp == NULL ||
		    (plevel < path->active &&
		     altpath->blk[plevel].bp == path->blk[plevel].bp))
			continue;

		xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
		altpath->blk[plevel].bp = NULL;
	}

	return error;
}

/* Check a block's sibling pointers. */
STATIC int
xchk_da_btree_block_check_siblings(
	struct xchk_da_btree	*ds,
	int			level,
	struct xfs_da_blkinfo	*hdr)
{
	xfs_dablk_t		forw;
	xfs_dablk_t		back;
	int			error = 0;

	forw = be32_to_cpu(hdr->forw);
	back = be32_to_cpu(hdr->back);

	/* Top level blocks should not have sibling pointers. */
	if (level == 0) {
		if (forw != 0 || back != 0)
			xchk_da_set_corrupt(ds, level);
		return 0;
	}

	/*
	 * Check back (left) and forw (right) pointers.  These functions
	 * absorb error codes for us.
	 */
	error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
	if (error)
		goto out;
	error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);

out:
	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
	return error;
}

/* Load a dir/attribute block from a btree. */
STATIC int
xchk_da_btree_block(
	struct xchk_da_btree		*ds,
	int				level,
	xfs_dablk_t			blkno)
{
	struct xfs_da_state_blk		*blk;
	struct xfs_da_intnode		*node;
	struct xfs_da_node_entry	*btree;
	struct xfs_da3_blkinfo		*hdr3;
	struct xfs_da_args		*dargs = &ds->dargs;
	struct xfs_inode		*ip = ds->dargs.dp;
	xfs_failaddr_t			fa;
	xfs_ino_t			owner;
	int				*pmaxrecs;
	struct xfs_da3_icnode_hdr	nodehdr;
	int				error = 0;

	blk = &ds->state->path.blk[level];
	ds->state->path.active = level + 1;

	/* Release old block. */
	if (blk->bp) {
		xfs_trans_brelse(dargs->trans, blk->bp);
		blk->bp = NULL;
	}

	/* Check the pointer. */
	blk->blkno = blkno;
	if (!xchk_da_btree_ptr_ok(ds, level, blkno))
		goto out_nobuf;

	/* Read the buffer. */
	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
			XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
			&xchk_da_btree_buf_ops);
	if (!xchk_da_process_error(ds, level, &error))
		goto out_nobuf;
	if (blk->bp)
		xchk_buffer_recheck(ds->sc, blk->bp);

	/*
	 * We didn't find a dir btree root block, which means that
	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
	 * to be), so jump out now.
	 */
	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
			blk->bp == NULL)
		goto out_nobuf;

	/* It's /not/ ok for attr trees not to have a da btree. */
	if (blk->bp == NULL) {
		xchk_da_set_corrupt(ds, level);
		goto out_nobuf;
	}

	hdr3 = blk->bp->b_addr;
	blk->magic = be16_to_cpu(hdr3->hdr.magic);
	pmaxrecs = &ds->maxrecs[level];

	/* We only started zeroing the header on v5 filesystems. */
	if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
		xchk_da_set_corrupt(ds, level);

	/* Check the owner. */
	if (xfs_has_crc(ip->i_mount)) {
		owner = be64_to_cpu(hdr3->owner);
		if (owner != ip->i_ino)
			xchk_da_set_corrupt(ds, level);
	}

	/* Check the siblings. */
	error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
	if (error)
		goto out;

	/* Interpret the buffer. */
	switch (blk->magic) {
	case XFS_ATTR_LEAF_MAGIC:
	case XFS_ATTR3_LEAF_MAGIC:
		xfs_trans_buf_set_type(dargs->trans, blk->bp,
				XFS_BLFT_ATTR_LEAF_BUF);
		blk->magic = XFS_ATTR_LEAF_MAGIC;
		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
		if (ds->tree_level != 0)
			xchk_da_set_corrupt(ds, level);
		break;
	case XFS_DIR2_LEAFN_MAGIC:
	case XFS_DIR3_LEAFN_MAGIC:
		xfs_trans_buf_set_type(dargs->trans, blk->bp,
				XFS_BLFT_DIR_LEAFN_BUF);
		blk->magic = XFS_DIR2_LEAFN_MAGIC;
		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
		if (ds->tree_level != 0)
			xchk_da_set_corrupt(ds, level);
		break;
	case XFS_DIR2_LEAF1_MAGIC:
	case XFS_DIR3_LEAF1_MAGIC:
		xfs_trans_buf_set_type(dargs->trans, blk->bp,
				XFS_BLFT_DIR_LEAF1_BUF);
		blk->magic = XFS_DIR2_LEAF1_MAGIC;
		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
		if (ds->tree_level != 0)
			xchk_da_set_corrupt(ds, level);
		break;
	case XFS_DA_NODE_MAGIC:
	case XFS_DA3_NODE_MAGIC:
		xfs_trans_buf_set_type(dargs->trans, blk->bp,
				XFS_BLFT_DA_NODE_BUF);
		blk->magic = XFS_DA_NODE_MAGIC;
		node = blk->bp->b_addr;
		xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
		btree = nodehdr.btree;
		*pmaxrecs = nodehdr.count;
		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
		if (level == 0) {
			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
				xchk_da_set_corrupt(ds, level);
				goto out_freebp;
			}
			ds->tree_level = nodehdr.level;
		} else {
			if (ds->tree_level != nodehdr.level) {
				xchk_da_set_corrupt(ds, level);
				goto out_freebp;
			}
		}

		/* XXX: Check hdr3.pad32 once we know how to fix it. */
		break;
	default:
		xchk_da_set_corrupt(ds, level);
		goto out_freebp;
	}

	fa = xfs_da3_header_check(blk->bp, dargs->owner);
	if (fa) {
		xchk_da_set_corrupt(ds, level);
		goto out_freebp;
	}

	/*
	 * If we've been handed a block that is below the dabtree root, does
	 * its hashval match what the parent block expected to see?
	 */
	if (level > 0) {
		struct xfs_da_node_entry	*key;

		key = xchk_da_btree_node_entry(ds, level - 1);
		if (be32_to_cpu(key->hashval) != blk->hashval) {
			xchk_da_set_corrupt(ds, level);
			goto out_freebp;
		}
	}

out:
	return error;
out_freebp:
	xfs_trans_brelse(dargs->trans, blk->bp);
	blk->bp = NULL;
out_nobuf:
	blk->blkno = 0;
	return error;
}

/* Visit all nodes and leaves of a da btree. */
int
xchk_da_btree(
	struct xfs_scrub		*sc,
	int				whichfork,
	xchk_da_btree_rec_fn		scrub_fn,
	void				*private)
{
	struct xchk_da_btree		*ds;
	struct xfs_mount		*mp = sc->mp;
	struct xfs_da_state_blk		*blks;
	struct xfs_da_node_entry	*key;
	xfs_dablk_t			blkno;
	int				level;
	int				error;

	/* Skip short format data structures; no btree to scan. */
	if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork)))
		return 0;

	/* Set up initial da state. */
	ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS);
	if (!ds)
		return -ENOMEM;
	ds->dargs.dp = sc->ip;
	ds->dargs.whichfork = whichfork;
	ds->dargs.trans = sc->tp;
	ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
	ds->dargs.owner = sc->ip->i_ino;
	ds->state = xfs_da_state_alloc(&ds->dargs);
	ds->sc = sc;
	ds->private = private;
	if (whichfork == XFS_ATTR_FORK) {
		ds->dargs.geo = mp->m_attr_geo;
		ds->lowest = 0;
		ds->highest = 0;
	} else {
		ds->dargs.geo = mp->m_dir_geo;
		ds->lowest = ds->dargs.geo->leafblk;
		ds->highest = ds->dargs.geo->freeblk;
	}
	blkno = ds->lowest;
	level = 0;

	/* Find the root of the da tree, if present. */
	blks = ds->state->path.blk;
	error = xchk_da_btree_block(ds, level, blkno);
	if (error)
		goto out_state;
	/*
	 * We didn't find a block at ds->lowest, which means that there's
	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
	 * so jump out now.
	 */
	if (blks[level].bp == NULL)
		goto out_state;

	blks[level].index = 0;
	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
		/* Handle leaf block. */
		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
			/* End of leaf, pop back towards the root. */
			if (blks[level].index >= ds->maxrecs[level]) {
				if (level > 0)
					blks[level - 1].index++;
				ds->tree_level++;
				level--;
				continue;
			}

			/* Dispatch record scrubbing. */
			error = scrub_fn(ds, level);
			if (error)
				break;
			if (xchk_should_terminate(sc, &error) ||
			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
				break;

			blks[level].index++;
			continue;
		}


		/* End of node, pop back towards the root. */
		if (blks[level].index >= ds->maxrecs[level]) {
			if (level > 0)
				blks[level - 1].index++;
			ds->tree_level++;
			level--;
			continue;
		}

		/* Hashes in order for scrub? */
		key = xchk_da_btree_node_entry(ds, level);
		error = xchk_da_btree_hash(ds, level, &key->hashval);
		if (error)
			goto out;

		/* Drill another level deeper. */
		blkno = be32_to_cpu(key->before);
		level++;
		if (level >= XFS_DA_NODE_MAXDEPTH) {
			/* Too deep! */
			xchk_da_set_corrupt(ds, level - 1);
			break;
		}
		ds->tree_level--;
		error = xchk_da_btree_block(ds, level, blkno);
		if (error)
			goto out;
		if (blks[level].bp == NULL)
			goto out;

		blks[level].index = 0;
	}

out:
	/* Release all the buffers we're tracking. */
	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
		if (blks[level].bp == NULL)
			continue;
		xfs_trans_brelse(sc->tp, blks[level].bp);
		blks[level].bp = NULL;
	}

out_state:
	xfs_da_state_free(ds->state);
	kfree(ds);
	return error;
}