linux/fs/xfs/xfs_health.c

// SPDX-License-Identifier: GPL-2.0+
/*
 * Copyright (C) 2019 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <[email protected]>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trace.h"
#include "xfs_health.h"
#include "xfs_ag.h"
#include "xfs_btree.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_quota_defs.h"

/*
 * Warn about metadata corruption that we detected but haven't fixed, and
 * make sure we're not sitting on anything that would get in the way of
 * recovery.
 */
void
xfs_health_unmount(
	struct xfs_mount	*mp)
{
	struct xfs_perag	*pag;
	xfs_agnumber_t		agno;
	unsigned int		sick = 0;
	unsigned int		checked = 0;
	bool			warn = false;

	if (xfs_is_shutdown(mp))
		return;

	/* Measure AG corruption levels. */
	for_each_perag(mp, agno, pag) {
		xfs_ag_measure_sickness(pag, &sick, &checked);
		if (sick) {
			trace_xfs_ag_unfixed_corruption(mp, agno, sick);
			warn = true;
		}
	}

	/* Measure realtime volume corruption levels. */
	xfs_rt_measure_sickness(mp, &sick, &checked);
	if (sick) {
		trace_xfs_rt_unfixed_corruption(mp, sick);
		warn = true;
	}

	/*
	 * Measure fs corruption and keep the sample around for the warning.
	 * See the note below for why we exempt FS_COUNTERS.
	 */
	xfs_fs_measure_sickness(mp, &sick, &checked);
	if (sick & ~XFS_SICK_FS_COUNTERS) {
		trace_xfs_fs_unfixed_corruption(mp, sick);
		warn = true;
	}

	if (warn) {
		xfs_warn(mp,
"Uncorrected metadata errors detected; please run xfs_repair.");

		/*
		 * We discovered uncorrected metadata problems at some point
		 * during this filesystem mount and have advised the
		 * administrator to run repair once the unmount completes.
		 *
		 * However, we must be careful -- when FSCOUNTERS are flagged
		 * unhealthy, the unmount procedure omits writing the clean
		 * unmount record to the log so that the next mount will run
		 * recovery and recompute the summary counters.  In other
		 * words, we leave a dirty log to get the counters fixed.
		 *
		 * Unfortunately, xfs_repair cannot recover dirty logs, so if
		 * there were filesystem problems, FSCOUNTERS was flagged, and
		 * the administrator takes our advice to run xfs_repair,
		 * they'll have to zap the log before repairing structures.
		 * We don't really want to encourage this, so we mark the
		 * FSCOUNTERS healthy so that a subsequent repair run won't see
		 * a dirty log.
		 */
		if (sick & XFS_SICK_FS_COUNTERS)
			xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
	}
}

/* Mark unhealthy per-fs metadata. */
void
xfs_fs_mark_sick(
	struct xfs_mount	*mp,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_FS_ALL));
	trace_xfs_fs_mark_sick(mp, mask);

	spin_lock(&mp->m_sb_lock);
	mp->m_fs_sick |= mask;
	spin_unlock(&mp->m_sb_lock);
}

/* Mark per-fs metadata as having been checked and found unhealthy by fsck. */
void
xfs_fs_mark_corrupt(
	struct xfs_mount	*mp,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_FS_ALL));
	trace_xfs_fs_mark_corrupt(mp, mask);

	spin_lock(&mp->m_sb_lock);
	mp->m_fs_sick |= mask;
	mp->m_fs_checked |= mask;
	spin_unlock(&mp->m_sb_lock);
}

/* Mark a per-fs metadata healed. */
void
xfs_fs_mark_healthy(
	struct xfs_mount	*mp,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_FS_ALL));
	trace_xfs_fs_mark_healthy(mp, mask);

	spin_lock(&mp->m_sb_lock);
	mp->m_fs_sick &= ~mask;
	if (!(mp->m_fs_sick & XFS_SICK_FS_PRIMARY))
		mp->m_fs_sick &= ~XFS_SICK_FS_SECONDARY;
	mp->m_fs_checked |= mask;
	spin_unlock(&mp->m_sb_lock);
}

/* Sample which per-fs metadata are unhealthy. */
void
xfs_fs_measure_sickness(
	struct xfs_mount	*mp,
	unsigned int		*sick,
	unsigned int		*checked)
{
	spin_lock(&mp->m_sb_lock);
	*sick = mp->m_fs_sick;
	*checked = mp->m_fs_checked;
	spin_unlock(&mp->m_sb_lock);
}

/* Mark unhealthy realtime metadata. */
void
xfs_rt_mark_sick(
	struct xfs_mount	*mp,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_RT_ALL));
	trace_xfs_rt_mark_sick(mp, mask);

	spin_lock(&mp->m_sb_lock);
	mp->m_rt_sick |= mask;
	spin_unlock(&mp->m_sb_lock);
}

/* Mark realtime metadata as having been checked and found unhealthy by fsck. */
void
xfs_rt_mark_corrupt(
	struct xfs_mount	*mp,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_RT_ALL));
	trace_xfs_rt_mark_corrupt(mp, mask);

	spin_lock(&mp->m_sb_lock);
	mp->m_rt_sick |= mask;
	mp->m_rt_checked |= mask;
	spin_unlock(&mp->m_sb_lock);
}

/* Mark a realtime metadata healed. */
void
xfs_rt_mark_healthy(
	struct xfs_mount	*mp,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_RT_ALL));
	trace_xfs_rt_mark_healthy(mp, mask);

	spin_lock(&mp->m_sb_lock);
	mp->m_rt_sick &= ~mask;
	if (!(mp->m_rt_sick & XFS_SICK_RT_PRIMARY))
		mp->m_rt_sick &= ~XFS_SICK_RT_SECONDARY;
	mp->m_rt_checked |= mask;
	spin_unlock(&mp->m_sb_lock);
}

/* Sample which realtime metadata are unhealthy. */
void
xfs_rt_measure_sickness(
	struct xfs_mount	*mp,
	unsigned int		*sick,
	unsigned int		*checked)
{
	spin_lock(&mp->m_sb_lock);
	*sick = mp->m_rt_sick;
	*checked = mp->m_rt_checked;
	spin_unlock(&mp->m_sb_lock);
}

/* Mark unhealthy per-ag metadata given a raw AG number. */
void
xfs_agno_mark_sick(
	struct xfs_mount	*mp,
	xfs_agnumber_t		agno,
	unsigned int		mask)
{
	struct xfs_perag	*pag = xfs_perag_get(mp, agno);

	/* per-ag structure not set up yet? */
	if (!pag)
		return;

	xfs_ag_mark_sick(pag, mask);
	xfs_perag_put(pag);
}

/* Mark unhealthy per-ag metadata. */
void
xfs_ag_mark_sick(
	struct xfs_perag	*pag,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_AG_ALL));
	trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask);

	spin_lock(&pag->pag_state_lock);
	pag->pag_sick |= mask;
	spin_unlock(&pag->pag_state_lock);
}

/* Mark per-ag metadata as having been checked and found unhealthy by fsck. */
void
xfs_ag_mark_corrupt(
	struct xfs_perag	*pag,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_AG_ALL));
	trace_xfs_ag_mark_corrupt(pag->pag_mount, pag->pag_agno, mask);

	spin_lock(&pag->pag_state_lock);
	pag->pag_sick |= mask;
	pag->pag_checked |= mask;
	spin_unlock(&pag->pag_state_lock);
}

/* Mark per-ag metadata ok. */
void
xfs_ag_mark_healthy(
	struct xfs_perag	*pag,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_AG_ALL));
	trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask);

	spin_lock(&pag->pag_state_lock);
	pag->pag_sick &= ~mask;
	if (!(pag->pag_sick & XFS_SICK_AG_PRIMARY))
		pag->pag_sick &= ~XFS_SICK_AG_SECONDARY;
	pag->pag_checked |= mask;
	spin_unlock(&pag->pag_state_lock);
}

/* Sample which per-ag metadata are unhealthy. */
void
xfs_ag_measure_sickness(
	struct xfs_perag	*pag,
	unsigned int		*sick,
	unsigned int		*checked)
{
	spin_lock(&pag->pag_state_lock);
	*sick = pag->pag_sick;
	*checked = pag->pag_checked;
	spin_unlock(&pag->pag_state_lock);
}

/* Mark the unhealthy parts of an inode. */
void
xfs_inode_mark_sick(
	struct xfs_inode	*ip,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_INO_ALL));
	trace_xfs_inode_mark_sick(ip, mask);

	spin_lock(&ip->i_flags_lock);
	ip->i_sick |= mask;
	spin_unlock(&ip->i_flags_lock);

	/*
	 * Keep this inode around so we don't lose the sickness report.  Scrub
	 * grabs inodes with DONTCACHE assuming that most inode are ok, which
	 * is not the case here.
	 */
	spin_lock(&VFS_I(ip)->i_lock);
	VFS_I(ip)->i_state &= ~I_DONTCACHE;
	spin_unlock(&VFS_I(ip)->i_lock);
}

/* Mark inode metadata as having been checked and found unhealthy by fsck. */
void
xfs_inode_mark_corrupt(
	struct xfs_inode	*ip,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_INO_ALL));
	trace_xfs_inode_mark_corrupt(ip, mask);

	spin_lock(&ip->i_flags_lock);
	ip->i_sick |= mask;
	ip->i_checked |= mask;
	spin_unlock(&ip->i_flags_lock);

	/*
	 * Keep this inode around so we don't lose the sickness report.  Scrub
	 * grabs inodes with DONTCACHE assuming that most inode are ok, which
	 * is not the case here.
	 */
	spin_lock(&VFS_I(ip)->i_lock);
	VFS_I(ip)->i_state &= ~I_DONTCACHE;
	spin_unlock(&VFS_I(ip)->i_lock);
}

/* Mark parts of an inode healed. */
void
xfs_inode_mark_healthy(
	struct xfs_inode	*ip,
	unsigned int		mask)
{
	ASSERT(!(mask & ~XFS_SICK_INO_ALL));
	trace_xfs_inode_mark_healthy(ip, mask);

	spin_lock(&ip->i_flags_lock);
	ip->i_sick &= ~mask;
	if (!(ip->i_sick & XFS_SICK_INO_PRIMARY))
		ip->i_sick &= ~XFS_SICK_INO_SECONDARY;
	ip->i_checked |= mask;
	spin_unlock(&ip->i_flags_lock);
}

/* Sample which parts of an inode are unhealthy. */
void
xfs_inode_measure_sickness(
	struct xfs_inode	*ip,
	unsigned int		*sick,
	unsigned int		*checked)
{
	spin_lock(&ip->i_flags_lock);
	*sick = ip->i_sick;
	*checked = ip->i_checked;
	spin_unlock(&ip->i_flags_lock);
}

/* Mappings between internal sick masks and ioctl sick masks. */

struct ioctl_sick_map {
	unsigned int		sick_mask;
	unsigned int		ioctl_mask;
};

static const struct ioctl_sick_map fs_map[] = {
	{ XFS_SICK_FS_COUNTERS,	XFS_FSOP_GEOM_SICK_COUNTERS},
	{ XFS_SICK_FS_UQUOTA,	XFS_FSOP_GEOM_SICK_UQUOTA },
	{ XFS_SICK_FS_GQUOTA,	XFS_FSOP_GEOM_SICK_GQUOTA },
	{ XFS_SICK_FS_PQUOTA,	XFS_FSOP_GEOM_SICK_PQUOTA },
	{ XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK },
	{ XFS_SICK_FS_NLINKS,	XFS_FSOP_GEOM_SICK_NLINKS },
	{ 0, 0 },
};

static const struct ioctl_sick_map rt_map[] = {
	{ XFS_SICK_RT_BITMAP,	XFS_FSOP_GEOM_SICK_RT_BITMAP },
	{ XFS_SICK_RT_SUMMARY,	XFS_FSOP_GEOM_SICK_RT_SUMMARY },
	{ 0, 0 },
};

static inline void
xfgeo_health_tick(
	struct xfs_fsop_geom		*geo,
	unsigned int			sick,
	unsigned int			checked,
	const struct ioctl_sick_map	*m)
{
	if (checked & m->sick_mask)
		geo->checked |= m->ioctl_mask;
	if (sick & m->sick_mask)
		geo->sick |= m->ioctl_mask;
}

/* Fill out fs geometry health info. */
void
xfs_fsop_geom_health(
	struct xfs_mount		*mp,
	struct xfs_fsop_geom		*geo)
{
	const struct ioctl_sick_map	*m;
	unsigned int			sick;
	unsigned int			checked;

	geo->sick = 0;
	geo->checked = 0;

	xfs_fs_measure_sickness(mp, &sick, &checked);
	for (m = fs_map; m->sick_mask; m++)
		xfgeo_health_tick(geo, sick, checked, m);

	xfs_rt_measure_sickness(mp, &sick, &checked);
	for (m = rt_map; m->sick_mask; m++)
		xfgeo_health_tick(geo, sick, checked, m);
}

static const struct ioctl_sick_map ag_map[] = {
	{ XFS_SICK_AG_SB,	XFS_AG_GEOM_SICK_SB },
	{ XFS_SICK_AG_AGF,	XFS_AG_GEOM_SICK_AGF },
	{ XFS_SICK_AG_AGFL,	XFS_AG_GEOM_SICK_AGFL },
	{ XFS_SICK_AG_AGI,	XFS_AG_GEOM_SICK_AGI },
	{ XFS_SICK_AG_BNOBT,	XFS_AG_GEOM_SICK_BNOBT },
	{ XFS_SICK_AG_CNTBT,	XFS_AG_GEOM_SICK_CNTBT },
	{ XFS_SICK_AG_INOBT,	XFS_AG_GEOM_SICK_INOBT },
	{ XFS_SICK_AG_FINOBT,	XFS_AG_GEOM_SICK_FINOBT },
	{ XFS_SICK_AG_RMAPBT,	XFS_AG_GEOM_SICK_RMAPBT },
	{ XFS_SICK_AG_REFCNTBT,	XFS_AG_GEOM_SICK_REFCNTBT },
	{ XFS_SICK_AG_INODES,	XFS_AG_GEOM_SICK_INODES },
	{ 0, 0 },
};

/* Fill out ag geometry health info. */
void
xfs_ag_geom_health(
	struct xfs_perag		*pag,
	struct xfs_ag_geometry		*ageo)
{
	const struct ioctl_sick_map	*m;
	unsigned int			sick;
	unsigned int			checked;

	ageo->ag_sick = 0;
	ageo->ag_checked = 0;

	xfs_ag_measure_sickness(pag, &sick, &checked);
	for (m = ag_map; m->sick_mask; m++) {
		if (checked & m->sick_mask)
			ageo->ag_checked |= m->ioctl_mask;
		if (sick & m->sick_mask)
			ageo->ag_sick |= m->ioctl_mask;
	}
}

static const struct ioctl_sick_map ino_map[] = {
	{ XFS_SICK_INO_CORE,	XFS_BS_SICK_INODE },
	{ XFS_SICK_INO_BMBTD,	XFS_BS_SICK_BMBTD },
	{ XFS_SICK_INO_BMBTA,	XFS_BS_SICK_BMBTA },
	{ XFS_SICK_INO_BMBTC,	XFS_BS_SICK_BMBTC },
	{ XFS_SICK_INO_DIR,	XFS_BS_SICK_DIR },
	{ XFS_SICK_INO_XATTR,	XFS_BS_SICK_XATTR },
	{ XFS_SICK_INO_SYMLINK,	XFS_BS_SICK_SYMLINK },
	{ XFS_SICK_INO_PARENT,	XFS_BS_SICK_PARENT },
	{ XFS_SICK_INO_BMBTD_ZAPPED,	XFS_BS_SICK_BMBTD },
	{ XFS_SICK_INO_BMBTA_ZAPPED,	XFS_BS_SICK_BMBTA },
	{ XFS_SICK_INO_DIR_ZAPPED,	XFS_BS_SICK_DIR },
	{ XFS_SICK_INO_SYMLINK_ZAPPED,	XFS_BS_SICK_SYMLINK },
	{ XFS_SICK_INO_DIRTREE,	XFS_BS_SICK_DIRTREE },
	{ 0, 0 },
};

/* Fill out bulkstat health info. */
void
xfs_bulkstat_health(
	struct xfs_inode		*ip,
	struct xfs_bulkstat		*bs)
{
	const struct ioctl_sick_map	*m;
	unsigned int			sick;
	unsigned int			checked;

	bs->bs_sick = 0;
	bs->bs_checked = 0;

	xfs_inode_measure_sickness(ip, &sick, &checked);
	for (m = ino_map; m->sick_mask; m++) {
		if (checked & m->sick_mask)
			bs->bs_checked |= m->ioctl_mask;
		if (sick & m->sick_mask)
			bs->bs_sick |= m->ioctl_mask;
	}
}

/* Mark a block mapping sick. */
void
xfs_bmap_mark_sick(
	struct xfs_inode	*ip,
	int			whichfork)
{
	unsigned int		mask;

	switch (whichfork) {
	case XFS_DATA_FORK:
		mask = XFS_SICK_INO_BMBTD;
		break;
	case XFS_ATTR_FORK:
		mask = XFS_SICK_INO_BMBTA;
		break;
	case XFS_COW_FORK:
		mask = XFS_SICK_INO_BMBTC;
		break;
	default:
		ASSERT(0);
		return;
	}

	xfs_inode_mark_sick(ip, mask);
}

/* Record observations of btree corruption with the health tracking system. */
void
xfs_btree_mark_sick(
	struct xfs_btree_cur		*cur)
{
	switch (cur->bc_ops->type) {
	case XFS_BTREE_TYPE_MEM:
		/* no health state tracking for ephemeral btrees */
		return;
	case XFS_BTREE_TYPE_AG:
		ASSERT(cur->bc_ops->sick_mask);
		xfs_ag_mark_sick(cur->bc_ag.pag, cur->bc_ops->sick_mask);
		return;
	case XFS_BTREE_TYPE_INODE:
		if (xfs_btree_is_bmap(cur->bc_ops)) {
			xfs_bmap_mark_sick(cur->bc_ino.ip,
					   cur->bc_ino.whichfork);
			return;
		}
		fallthrough;
	default:
		ASSERT(0);
		return;
	}
}

/*
 * Record observations of dir/attr btree corruption with the health tracking
 * system.
 */
void
xfs_dirattr_mark_sick(
	struct xfs_inode	*ip,
	int			whichfork)
{
	unsigned int		mask;

	switch (whichfork) {
	case XFS_DATA_FORK:
		mask = XFS_SICK_INO_DIR;
		break;
	case XFS_ATTR_FORK:
		mask = XFS_SICK_INO_XATTR;
		break;
	default:
		ASSERT(0);
		return;
	}

	xfs_inode_mark_sick(ip, mask);
}

/*
 * Record observations of dir/attr btree corruption with the health tracking
 * system.
 */
void
xfs_da_mark_sick(
	struct xfs_da_args	*args)
{
	xfs_dirattr_mark_sick(args->dp, args->whichfork);
}