xfs_inode_item.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
 * All Rights Reserved.
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_error.h"
#include "xfs_rtbitmap.h"

#include <linux/iversion.h>

struct kmem_cache	*xfs_ili_cache;		/* inode log item */

static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
{ … }

static uint64_t
xfs_inode_item_sort(
	struct xfs_log_item	*lip)
{ … }

#ifdef DEBUG_EXPENSIVE
static void
xfs_inode_item_precommit_check(
	struct xfs_inode	*ip)
{ … }
#else
#define xfs_inode_item_precommit_check …
#endif

/*
 * Prior to finally logging the inode, we have to ensure that all the
 * per-modification inode state changes are applied. This includes VFS inode
 * state updates, format conversions, verifier state synchronisation and
 * ensuring the inode buffer remains in memory whilst the inode is dirty.
 *
 * We have to be careful when we grab the inode cluster buffer due to lock
 * ordering constraints. The unlinked inode modifications (xfs_iunlink_item)
 * require AGI -> inode cluster buffer lock order. The inode cluster buffer is
 * not locked until ->precommit, so it happens after everything else has been
 * modified.
 *
 * Further, we have AGI -> AGF lock ordering, and with O_TMPFILE handling we
 * have AGI -> AGF -> iunlink item -> inode cluster buffer lock order. Hence we
 * cannot safely lock the inode cluster buffer in xfs_trans_log_inode() because
 * it can be called on a inode (e.g. via bumplink/droplink) before we take the
 * AGF lock modifying directory blocks.
 *
 * Rather than force a complete rework of all the transactions to call
 * xfs_trans_log_inode() once and once only at the end of every transaction, we
 * move the pinning of the inode cluster buffer to a ->precommit operation. This
 * matches how the xfs_iunlink_item locks the inode cluster buffer, and it
 * ensures that the inode cluster buffer locking is always done last in a
 * transaction. i.e. we ensure the lock order is always AGI -> AGF -> inode
 * cluster buffer.
 *
 * If we return the inode number as the precommit sort key then we'll also
 * guarantee that the order all inode cluster buffer locking is the same all the
 * inodes and unlink items in the transaction.
 */
static int
xfs_inode_item_precommit(
	struct xfs_trans	*tp,
	struct xfs_log_item	*lip)
{ … }

/*
 * The logged size of an inode fork is always the current size of the inode
 * fork. This means that when an inode fork is relogged, the size of the logged
 * region is determined by the current state, not the combination of the
 * previously logged state + the current state. This is different relogging
 * behaviour to most other log items which will retain the size of the
 * previously logged changes when smaller regions are relogged.
 *
 * Hence operations that remove data from the inode fork (e.g. shortform
 * dir/attr remove, extent form extent removal, etc), the size of the relogged
 * inode gets -smaller- rather than stays the same size as the previously logged
 * size and this can result in the committing transaction reducing the amount of
 * space being consumed by the CIL.
 */
STATIC void
xfs_inode_item_data_fork_size(
	struct xfs_inode_log_item *iip,
	int			*nvecs,
	int			*nbytes)
{ … }

STATIC void
xfs_inode_item_attr_fork_size(
	struct xfs_inode_log_item *iip,
	int			*nvecs,
	int			*nbytes)
{ … }

/*
 * This returns the number of iovecs needed to log the given inode item.
 *
 * We need one iovec for the inode log format structure, one for the
 * inode core, and possibly one for the inode data/extents/b-tree root
 * and one for the inode attribute data/extents/b-tree root.
 */
STATIC void
xfs_inode_item_size(
	struct xfs_log_item	*lip,
	int			*nvecs,
	int			*nbytes)
{ … }

STATIC void
xfs_inode_item_format_data_fork(
	struct xfs_inode_log_item *iip,
	struct xfs_inode_log_format *ilf,
	struct xfs_log_vec	*lv,
	struct xfs_log_iovec	**vecp)
{ … }

STATIC void
xfs_inode_item_format_attr_fork(
	struct xfs_inode_log_item *iip,
	struct xfs_inode_log_format *ilf,
	struct xfs_log_vec	*lv,
	struct xfs_log_iovec	**vecp)
{ … }

/*
 * Convert an incore timestamp to a log timestamp.  Note that the log format
 * specifies host endian format!
 */
static inline xfs_log_timestamp_t
xfs_inode_to_log_dinode_ts(
	struct xfs_inode		*ip,
	const struct timespec64		tv)
{ … }

/*
 * The legacy DMAPI fields are only present in the on-disk and in-log inodes,
 * but not in the in-memory one.  But we are guaranteed to have an inode buffer
 * in memory when logging an inode, so we can just copy it from the on-disk
 * inode to the in-log inode here so that recovery of file system with these
 * fields set to non-zero values doesn't lose them.  For all other cases we zero
 * the fields.
 */
static void
xfs_copy_dm_fields_to_log_dinode(
	struct xfs_inode	*ip,
	struct xfs_log_dinode	*to)
{ … }

static inline void
xfs_inode_to_log_dinode_iext_counters(
	struct xfs_inode	*ip,
	struct xfs_log_dinode	*to)
{ … }

static void
xfs_inode_to_log_dinode(
	struct xfs_inode	*ip,
	struct xfs_log_dinode	*to,
	xfs_lsn_t		lsn)
{ … }

/*
 * Format the inode core. Current timestamp data is only in the VFS inode
 * fields, so we need to grab them from there. Hence rather than just copying
 * the XFS inode core structure, format the fields directly into the iovec.
 */
static void
xfs_inode_item_format_core(
	struct xfs_inode	*ip,
	struct xfs_log_vec	*lv,
	struct xfs_log_iovec	**vecp)
{ … }

/*
 * This is called to fill in the vector of log iovecs for the given inode
 * log item.  It fills the first item with an inode log format structure,
 * the second with the on-disk inode structure, and a possible third and/or
 * fourth with the inode data/extents/b-tree root and inode attributes
 * data/extents/b-tree root.
 *
 * Note: Always use the 64 bit inode log format structure so we don't
 * leave an uninitialised hole in the format item on 64 bit systems. Log
 * recovery on 32 bit systems handles this just fine, so there's no reason
 * for not using an initialising the properly padded structure all the time.
 */
STATIC void
xfs_inode_item_format(
	struct xfs_log_item	*lip,
	struct xfs_log_vec	*lv)
{ … }

/*
 * This is called to pin the inode associated with the inode log
 * item in memory so it cannot be written out.
 */
STATIC void
xfs_inode_item_pin(
	struct xfs_log_item	*lip)
{ … }


/*
 * This is called to unpin the inode associated with the inode log
 * item which was previously pinned with a call to xfs_inode_item_pin().
 *
 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
 *
 * Note that unpin can race with inode cluster buffer freeing marking the buffer
 * stale. In that case, flush completions are run from the buffer unpin call,
 * which may happen before the inode is unpinned. If we lose the race, there
 * will be no buffer attached to the log item, but the inode will be marked
 * XFS_ISTALE.
 */
STATIC void
xfs_inode_item_unpin(
	struct xfs_log_item	*lip,
	int			remove)
{ … }

STATIC uint
xfs_inode_item_push(
	struct xfs_log_item	*lip,
	struct list_head	*buffer_list)
		__releases(&lip->li_ailp->ail_lock)
		__acquires(&lip->li_ailp->ail_lock)
{ … }

/*
 * Unlock the inode associated with the inode log item.
 */
STATIC void
xfs_inode_item_release(
	struct xfs_log_item	*lip)
{ … }

/*
 * This is called to find out where the oldest active copy of the inode log
 * item in the on disk log resides now that the last log write of it completed
 * at the given lsn.  Since we always re-log all dirty data in an inode, the
 * latest copy in the on disk log is the only one that matters.  Therefore,
 * simply return the given lsn.
 *
 * If the inode has been marked stale because the cluster is being freed, we
 * don't want to (re-)insert this inode into the AIL. There is a race condition
 * where the cluster buffer may be unpinned before the inode is inserted into
 * the AIL during transaction committed processing. If the buffer is unpinned
 * before the inode item has been committed and inserted, then it is possible
 * for the buffer to be written and IO completes before the inode is inserted
 * into the AIL. In that case, we'd be inserting a clean, stale inode into the
 * AIL which will never get removed. It will, however, get reclaimed which
 * triggers an assert in xfs_inode_free() complaining about freein an inode
 * still in the AIL.
 *
 * To avoid this, just unpin the inode directly and return a LSN of -1 so the
 * transaction committed code knows that it does not need to do any further
 * processing on the item.
 */
STATIC xfs_lsn_t
xfs_inode_item_committed(
	struct xfs_log_item	*lip,
	xfs_lsn_t		lsn)
{ … }

STATIC void
xfs_inode_item_committing(
	struct xfs_log_item	*lip,
	xfs_csn_t		seq)
{ … }

static const struct xfs_item_ops xfs_inode_item_ops = …;


/*
 * Initialize the inode log item for a newly allocated (in-core) inode.
 */
void
xfs_inode_item_init(
	struct xfs_inode	*ip,
	struct xfs_mount	*mp)
{ … }

/*
 * Free the inode log item and any memory hanging off of it.
 */
void
xfs_inode_item_destroy(
	struct xfs_inode	*ip)
{ … }


/*
 * We only want to pull the item from the AIL if it is actually there
 * and its location in the log has not changed since we started the
 * flush.  Thus, we only bother if the inode's lsn has not changed.
 */
static void
xfs_iflush_ail_updates(
	struct xfs_ail		*ailp,
	struct list_head	*list)
{ … }

/*
 * Walk the list of inodes that have completed their IOs. If they are clean
 * remove them from the list and dissociate them from the buffer. Buffers that
 * are still dirty remain linked to the buffer and on the list. Caller must
 * handle them appropriately.
 */
static void
xfs_iflush_finish(
	struct xfs_buf		*bp,
	struct list_head	*list)
{ … }

/*
 * Inode buffer IO completion routine.  It is responsible for removing inodes
 * attached to the buffer from the AIL if they have not been re-logged and
 * completing the inode flush.
 */
void
xfs_buf_inode_iodone(
	struct xfs_buf		*bp)
{ … }

void
xfs_buf_inode_io_fail(
	struct xfs_buf		*bp)
{ … }

/*
 * Clear the inode logging fields so no more flushes are attempted.  If we are
 * on a buffer list, it is now safe to remove it because the buffer is
 * guaranteed to be locked. The caller will drop the reference to the buffer
 * the log item held.
 */
static void
xfs_iflush_abort_clean(
	struct xfs_inode_log_item *iip)
{ … }

/*
 * Abort flushing the inode from a context holding the cluster buffer locked.
 *
 * This is the normal runtime method of aborting writeback of an inode that is
 * attached to a cluster buffer. It occurs when the inode and the backing
 * cluster buffer have been freed (i.e. inode is XFS_ISTALE), or when cluster
 * flushing or buffer IO completion encounters a log shutdown situation.
 *
 * If we need to abort inode writeback and we don't already hold the buffer
 * locked, call xfs_iflush_shutdown_abort() instead as this should only ever be
 * necessary in a shutdown situation.
 */
void
xfs_iflush_abort(
	struct xfs_inode	*ip)
{ … }

/*
 * Abort an inode flush in the case of a shutdown filesystem. This can be called
 * from anywhere with just an inode reference and does not require holding the
 * inode cluster buffer locked. If the inode is attached to a cluster buffer,
 * it will grab and lock it safely, then abort the inode flush.
 */
void
xfs_iflush_shutdown_abort(
	struct xfs_inode	*ip)
{ … }


/*
 * convert an xfs_inode_log_format struct from the old 32 bit version
 * (which can have different field alignments) to the native 64 bit version
 */
int
xfs_inode_item_format_convert(
	struct xfs_log_iovec		*buf,
	struct xfs_inode_log_format	*in_f)
{ … }
linux/fs/xfs/xfs_inode_item.c