xfs_log_recover.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
 * All Rights Reserved.
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
#include "xfs_trans_priv.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_error.h"
#include "xfs_buf_item.h"
#include "xfs_ag.h"
#include "xfs_quota.h"
#include "xfs_reflink.h"

#define BLK_AVG(blk1, blk2) …

STATIC int
xlog_find_zeroed(
	struct xlog	*,
	xfs_daddr_t	*);
STATIC int
xlog_clear_stale_blocks(
	struct xlog	*,
	xfs_lsn_t);
STATIC int
xlog_do_recovery_pass(
        struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);

/*
 * Sector aligned buffer routines for buffer create/read/write/access
 */

/*
 * Verify the log-relative block number and length in basic blocks are valid for
 * an operation involving the given XFS log buffer. Returns true if the fields
 * are valid, false otherwise.
 */
static inline bool
xlog_verify_bno(
	struct xlog	*log,
	xfs_daddr_t	blk_no,
	int		bbcount)
{ … }

/*
 * Allocate a buffer to hold log data.  The buffer needs to be able to map to
 * a range of nbblks basic blocks at any valid offset within the log.
 */
static char *
xlog_alloc_buffer(
	struct xlog	*log,
	int		nbblks)
{ … }

/*
 * Return the address of the start of the given block number's data
 * in a log buffer.  The buffer covers a log sector-aligned region.
 */
static inline unsigned int
xlog_align(
	struct xlog	*log,
	xfs_daddr_t	blk_no)
{ … }

static int
xlog_do_io(
	struct xlog		*log,
	xfs_daddr_t		blk_no,
	unsigned int		nbblks,
	char			*data,
	enum req_op		op)
{ … }

STATIC int
xlog_bread_noalign(
	struct xlog	*log,
	xfs_daddr_t	blk_no,
	int		nbblks,
	char		*data)
{ … }

STATIC int
xlog_bread(
	struct xlog	*log,
	xfs_daddr_t	blk_no,
	int		nbblks,
	char		*data,
	char		**offset)
{ … }

STATIC int
xlog_bwrite(
	struct xlog	*log,
	xfs_daddr_t	blk_no,
	int		nbblks,
	char		*data)
{ … }

#ifdef DEBUG
/*
 * dump debug superblock and log record information
 */
STATIC void
xlog_header_check_dump(
	xfs_mount_t		*mp,
	xlog_rec_header_t	*head)
{ … }
#else
#define xlog_header_check_dump …
#endif

/*
 * check log record header for recovery
 */
STATIC int
xlog_header_check_recover(
	xfs_mount_t		*mp,
	xlog_rec_header_t	*head)
{ … }

/*
 * read the head block of the log and check the header
 */
STATIC int
xlog_header_check_mount(
	xfs_mount_t		*mp,
	xlog_rec_header_t	*head)
{ … }

/*
 * This routine finds (to an approximation) the first block in the physical
 * log which contains the given cycle.  It uses a binary search algorithm.
 * Note that the algorithm can not be perfect because the disk will not
 * necessarily be perfect.
 */
STATIC int
xlog_find_cycle_start(
	struct xlog	*log,
	char		*buffer,
	xfs_daddr_t	first_blk,
	xfs_daddr_t	*last_blk,
	uint		cycle)
{ … }

/*
 * Check that a range of blocks does not contain stop_on_cycle_no.
 * Fill in *new_blk with the block offset where such a block is
 * found, or with -1 (an invalid block number) if there is no such
 * block in the range.  The scan needs to occur from front to back
 * and the pointer into the region must be updated since a later
 * routine will need to perform another test.
 */
STATIC int
xlog_find_verify_cycle(
	struct xlog	*log,
	xfs_daddr_t	start_blk,
	int		nbblks,
	uint		stop_on_cycle_no,
	xfs_daddr_t	*new_blk)
{ … }

static inline int
xlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh)
{ … }

/*
 * Potentially backup over partial log record write.
 *
 * In the typical case, last_blk is the number of the block directly after
 * a good log record.  Therefore, we subtract one to get the block number
 * of the last block in the given buffer.  extra_bblks contains the number
 * of blocks we would have read on a previous read.  This happens when the
 * last log record is split over the end of the physical log.
 *
 * extra_bblks is the number of blocks potentially verified on a previous
 * call to this routine.
 */
STATIC int
xlog_find_verify_log_record(
	struct xlog		*log,
	xfs_daddr_t		start_blk,
	xfs_daddr_t		*last_blk,
	int			extra_bblks)
{ … }

/*
 * Head is defined to be the point of the log where the next log write
 * could go.  This means that incomplete LR writes at the end are
 * eliminated when calculating the head.  We aren't guaranteed that previous
 * LR have complete transactions.  We only know that a cycle number of
 * current cycle number -1 won't be present in the log if we start writing
 * from our current block number.
 *
 * last_blk contains the block number of the first block with a given
 * cycle number.
 *
 * Return: zero if normal, non-zero if error.
 */
STATIC int
xlog_find_head(
	struct xlog	*log,
	xfs_daddr_t	*return_head_blk)
{ … }

/*
 * Seek backwards in the log for log record headers.
 *
 * Given a starting log block, walk backwards until we find the provided number
 * of records or hit the provided tail block. The return value is the number of
 * records encountered or a negative error code. The log block and buffer
 * pointer of the last record seen are returned in rblk and rhead respectively.
 */
STATIC int
xlog_rseek_logrec_hdr(
	struct xlog		*log,
	xfs_daddr_t		head_blk,
	xfs_daddr_t		tail_blk,
	int			count,
	char			*buffer,
	xfs_daddr_t		*rblk,
	struct xlog_rec_header	**rhead,
	bool			*wrapped)
{ … }

/*
 * Seek forward in the log for log record headers.
 *
 * Given head and tail blocks, walk forward from the tail block until we find
 * the provided number of records or hit the head block. The return value is the
 * number of records encountered or a negative error code. The log block and
 * buffer pointer of the last record seen are returned in rblk and rhead
 * respectively.
 */
STATIC int
xlog_seek_logrec_hdr(
	struct xlog		*log,
	xfs_daddr_t		head_blk,
	xfs_daddr_t		tail_blk,
	int			count,
	char			*buffer,
	xfs_daddr_t		*rblk,
	struct xlog_rec_header	**rhead,
	bool			*wrapped)
{ … }

/*
 * Calculate distance from head to tail (i.e., unused space in the log).
 */
static inline int
xlog_tail_distance(
	struct xlog	*log,
	xfs_daddr_t	head_blk,
	xfs_daddr_t	tail_blk)
{ … }

/*
 * Verify the log tail. This is particularly important when torn or incomplete
 * writes have been detected near the front of the log and the head has been
 * walked back accordingly.
 *
 * We also have to handle the case where the tail was pinned and the head
 * blocked behind the tail right before a crash. If the tail had been pushed
 * immediately prior to the crash and the subsequent checkpoint was only
 * partially written, it's possible it overwrote the last referenced tail in the
 * log with garbage. This is not a coherency problem because the tail must have
 * been pushed before it can be overwritten, but appears as log corruption to
 * recovery because we have no way to know the tail was updated if the
 * subsequent checkpoint didn't write successfully.
 *
 * Therefore, CRC check the log from tail to head. If a failure occurs and the
 * offending record is within max iclog bufs from the head, walk the tail
 * forward and retry until a valid tail is found or corruption is detected out
 * of the range of a possible overwrite.
 */
STATIC int
xlog_verify_tail(
	struct xlog		*log,
	xfs_daddr_t		head_blk,
	xfs_daddr_t		*tail_blk,
	int			hsize)
{ … }

/*
 * Detect and trim torn writes from the head of the log.
 *
 * Storage without sector atomicity guarantees can result in torn writes in the
 * log in the event of a crash. Our only means to detect this scenario is via
 * CRC verification. While we can't always be certain that CRC verification
 * failure is due to a torn write vs. an unrelated corruption, we do know that
 * only a certain number (XLOG_MAX_ICLOGS) of log records can be written out at
 * one time. Therefore, CRC verify up to XLOG_MAX_ICLOGS records at the head of
 * the log and treat failures in this range as torn writes as a matter of
 * policy. In the event of CRC failure, the head is walked back to the last good
 * record in the log and the tail is updated from that record and verified.
 */
STATIC int
xlog_verify_head(
	struct xlog		*log,
	xfs_daddr_t		*head_blk,	/* in/out: unverified head */
	xfs_daddr_t		*tail_blk,	/* out: tail block */
	char			*buffer,
	xfs_daddr_t		*rhead_blk,	/* start blk of last record */
	struct xlog_rec_header	**rhead,	/* ptr to last record */
	bool			*wrapped)	/* last rec. wraps phys. log */
{ … }

/*
 * We need to make sure we handle log wrapping properly, so we can't use the
 * calculated logbno directly. Make sure it wraps to the correct bno inside the
 * log.
 *
 * The log is limited to 32 bit sizes, so we use the appropriate modulus
 * operation here and cast it back to a 64 bit daddr on return.
 */
static inline xfs_daddr_t
xlog_wrap_logbno(
	struct xlog		*log,
	xfs_daddr_t		bno)
{ … }

/*
 * Check whether the head of the log points to an unmount record. In other
 * words, determine whether the log is clean. If so, update the in-core state
 * appropriately.
 */
static int
xlog_check_unmount_rec(
	struct xlog		*log,
	xfs_daddr_t		*head_blk,
	xfs_daddr_t		*tail_blk,
	struct xlog_rec_header	*rhead,
	xfs_daddr_t		rhead_blk,
	char			*buffer,
	bool			*clean)
{ … }

static void
xlog_set_state(
	struct xlog		*log,
	xfs_daddr_t		head_blk,
	struct xlog_rec_header	*rhead,
	xfs_daddr_t		rhead_blk,
	bool			bump_cycle)
{ … }

/*
 * Find the sync block number or the tail of the log.
 *
 * This will be the block number of the last record to have its
 * associated buffers synced to disk.  Every log record header has
 * a sync lsn embedded in it.  LSNs hold block numbers, so it is easy
 * to get a sync block number.  The only concern is to figure out which
 * log record header to believe.
 *
 * The following algorithm uses the log record header with the largest
 * lsn.  The entire log record does not need to be valid.  We only care
 * that the header is valid.
 *
 * We could speed up search by using current head_blk buffer, but it is not
 * available.
 */
STATIC int
xlog_find_tail(
	struct xlog		*log,
	xfs_daddr_t		*head_blk,
	xfs_daddr_t		*tail_blk)
{ … }

/*
 * Is the log zeroed at all?
 *
 * The last binary search should be changed to perform an X block read
 * once X becomes small enough.  You can then search linearly through
 * the X blocks.  This will cut down on the number of reads we need to do.
 *
 * If the log is partially zeroed, this routine will pass back the blkno
 * of the first block with cycle number 0.  It won't have a complete LR
 * preceding it.
 *
 * Return:
 *	0  => the log is completely written to
 *	1 => use *blk_no as the first block of the log
 *	<0 => error has occurred
 */
STATIC int
xlog_find_zeroed(
	struct xlog	*log,
	xfs_daddr_t	*blk_no)
{ … }

/*
 * These are simple subroutines used by xlog_clear_stale_blocks() below
 * to initialize a buffer full of empty log record headers and write
 * them into the log.
 */
STATIC void
xlog_add_record(
	struct xlog		*log,
	char			*buf,
	int			cycle,
	int			block,
	int			tail_cycle,
	int			tail_block)
{ … }

STATIC int
xlog_write_log_records(
	struct xlog	*log,
	int		cycle,
	int		start_block,
	int		blocks,
	int		tail_cycle,
	int		tail_block)
{ … }

/*
 * This routine is called to blow away any incomplete log writes out
 * in front of the log head.  We do this so that we won't become confused
 * if we come up, write only a little bit more, and then crash again.
 * If we leave the partial log records out there, this situation could
 * cause us to think those partial writes are valid blocks since they
 * have the current cycle number.  We get rid of them by overwriting them
 * with empty log records with the old cycle number rather than the
 * current one.
 *
 * The tail lsn is passed in rather than taken from
 * the log so that we will not write over the unmount record after a
 * clean unmount in a 512 block log.  Doing so would leave the log without
 * any valid log records in it until a new one was written.  If we crashed
 * during that time we would not be able to recover.
 */
STATIC int
xlog_clear_stale_blocks(
	struct xlog	*log,
	xfs_lsn_t	tail_lsn)
{ … }

/*
 * Release the recovered intent item in the AIL that matches the given intent
 * type and intent id.
 */
void
xlog_recover_release_intent(
	struct xlog			*log,
	unsigned short			intent_type,
	uint64_t			intent_id)
{ … }

int
xlog_recover_iget(
	struct xfs_mount	*mp,
	xfs_ino_t		ino,
	struct xfs_inode	**ipp)
{ … }

/*
 * Get an inode so that we can recover a log operation.
 *
 * Log intent items that target inodes effectively contain a file handle.
 * Check that the generation number matches the intent item like we do for
 * other file handles.  Log intent items defined after this validation weakness
 * was identified must use this function.
 */
int
xlog_recover_iget_handle(
	struct xfs_mount	*mp,
	xfs_ino_t		ino,
	uint32_t		gen,
	struct xfs_inode	**ipp)
{ … }

/******************************************************************************
 *
 *		Log recover routines
 *
 ******************************************************************************
 */
static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = …;

static const struct xlog_recover_item_ops *
xlog_find_item_ops(
	struct xlog_recover_item		*item)
{ … }

/*
 * Sort the log items in the transaction.
 *
 * The ordering constraints are defined by the inode allocation and unlink
 * behaviour. The rules are:
 *
 *	1. Every item is only logged once in a given transaction. Hence it
 *	   represents the last logged state of the item. Hence ordering is
 *	   dependent on the order in which operations need to be performed so
 *	   required initial conditions are always met.
 *
 *	2. Cancelled buffers are recorded in pass 1 in a separate table and
 *	   there's nothing to replay from them so we can simply cull them
 *	   from the transaction. However, we can't do that until after we've
 *	   replayed all the other items because they may be dependent on the
 *	   cancelled buffer and replaying the cancelled buffer can remove it
 *	   form the cancelled buffer table. Hence they have tobe done last.
 *
 *	3. Inode allocation buffers must be replayed before inode items that
 *	   read the buffer and replay changes into it. For filesystems using the
 *	   ICREATE transactions, this means XFS_LI_ICREATE objects need to get
 *	   treated the same as inode allocation buffers as they create and
 *	   initialise the buffers directly.
 *
 *	4. Inode unlink buffers must be replayed after inode items are replayed.
 *	   This ensures that inodes are completely flushed to the inode buffer
 *	   in a "free" state before we remove the unlinked inode list pointer.
 *
 * Hence the ordering needs to be inode allocation buffers first, inode items
 * second, inode unlink buffers third and cancelled buffers last.
 *
 * But there's a problem with that - we can't tell an inode allocation buffer
 * apart from a regular buffer, so we can't separate them. We can, however,
 * tell an inode unlink buffer from the others, and so we can separate them out
 * from all the other buffers and move them to last.
 *
 * Hence, 4 lists, in order from head to tail:
 *	- buffer_list for all buffers except cancelled/inode unlink buffers
 *	- item_list for all non-buffer items
 *	- inode_buffer_list for inode unlink buffers
 *	- cancel_list for the cancelled buffers
 *
 * Note that we add objects to the tail of the lists so that first-to-last
 * ordering is preserved within the lists. Adding objects to the head of the
 * list means when we traverse from the head we walk them in last-to-first
 * order. For cancelled buffers and inode unlink buffers this doesn't matter,
 * but for all other items there may be specific ordering that we need to
 * preserve.
 */
STATIC int
xlog_recover_reorder_trans(
	struct xlog		*log,
	struct xlog_recover	*trans,
	int			pass)
{ … }

void
xlog_buf_readahead(
	struct xlog		*log,
	xfs_daddr_t		blkno,
	uint			len,
	const struct xfs_buf_ops *ops)
{ … }

/*
 * Create a deferred work structure for resuming and tracking the progress of a
 * log intent item that was found during recovery.
 */
void
xlog_recover_intent_item(
	struct xlog			*log,
	struct xfs_log_item		*lip,
	xfs_lsn_t			lsn,
	const struct xfs_defer_op_type	*ops)
{ … }

STATIC int
xlog_recover_items_pass2(
	struct xlog                     *log,
	struct xlog_recover             *trans,
	struct list_head                *buffer_list,
	struct list_head                *item_list)
{ … }

/*
 * Perform the transaction.
 *
 * If the transaction modifies a buffer or inode, do it now.  Otherwise,
 * EFIs and EFDs get queued up by adding entries into the AIL for them.
 */
STATIC int
xlog_recover_commit_trans(
	struct xlog		*log,
	struct xlog_recover	*trans,
	int			pass,
	struct list_head	*buffer_list)
{ … }

STATIC void
xlog_recover_add_item(
	struct list_head	*head)
{ … }

STATIC int
xlog_recover_add_to_cont_trans(
	struct xlog		*log,
	struct xlog_recover	*trans,
	char			*dp,
	int			len)
{ … }

/*
 * The next region to add is the start of a new region.  It could be
 * a whole region or it could be the first part of a new region.  Because
 * of this, the assumption here is that the type and size fields of all
 * format structures fit into the first 32 bits of the structure.
 *
 * This works because all regions must be 32 bit aligned.  Therefore, we
 * either have both fields or we have neither field.  In the case we have
 * neither field, the data part of the region is zero length.  We only have
 * a log_op_header and can throw away the header since a new one will appear
 * later.  If we have at least 4 bytes, then we can determine how many regions
 * will appear in the current log item.
 */
STATIC int
xlog_recover_add_to_trans(
	struct xlog		*log,
	struct xlog_recover	*trans,
	char			*dp,
	int			len)
{ … }

/*
 * Free up any resources allocated by the transaction
 *
 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
 */
STATIC void
xlog_recover_free_trans(
	struct xlog_recover	*trans)
{ … }

/*
 * On error or completion, trans is freed.
 */
STATIC int
xlog_recovery_process_trans(
	struct xlog		*log,
	struct xlog_recover	*trans,
	char			*dp,
	unsigned int		len,
	unsigned int		flags,
	int			pass,
	struct list_head	*buffer_list)
{ … }

/*
 * Lookup the transaction recovery structure associated with the ID in the
 * current ophdr. If the transaction doesn't exist and the start flag is set in
 * the ophdr, then allocate a new transaction for future ID matches to find.
 * Either way, return what we found during the lookup - an existing transaction
 * or nothing.
 */
STATIC struct xlog_recover *
xlog_recover_ophdr_to_trans(
	struct hlist_head	rhash[],
	struct xlog_rec_header	*rhead,
	struct xlog_op_header	*ohead)
{ … }

STATIC int
xlog_recover_process_ophdr(
	struct xlog		*log,
	struct hlist_head	rhash[],
	struct xlog_rec_header	*rhead,
	struct xlog_op_header	*ohead,
	char			*dp,
	char			*end,
	int			pass,
	struct list_head	*buffer_list)
{ … }

/*
 * There are two valid states of the r_state field.  0 indicates that the
 * transaction structure is in a normal state.  We have either seen the
 * start of the transaction or the last operation we added was not a partial
 * operation.  If the last operation we added to the transaction was a
 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS.
 *
 * NOTE: skip LRs with 0 data length.
 */
STATIC int
xlog_recover_process_data(
	struct xlog		*log,
	struct hlist_head	rhash[],
	struct xlog_rec_header	*rhead,
	char			*dp,
	int			pass,
	struct list_head	*buffer_list)
{ … }

/* Take all the collected deferred ops and finish them in order. */
static int
xlog_finish_defer_ops(
	struct xfs_mount	*mp,
	struct list_head	*capture_list)
{ … }

/* Release all the captured defer ops and capture structures in this list. */
static void
xlog_abort_defer_ops(
	struct xfs_mount		*mp,
	struct list_head		*capture_list)
{ … }

/*
 * When this is called, all of the log intent items which did not have
 * corresponding log done items should be in the AIL.  What we do now is update
 * the data structures associated with each one.
 *
 * Since we process the log intent items in normal transactions, they will be
 * removed at some point after the commit.  This prevents us from just walking
 * down the list processing each one.  We'll use a flag in the intent item to
 * skip those that we've already processed and use the AIL iteration mechanism's
 * generation count to try to speed this up at least a bit.
 *
 * When we start, we know that the intents are the only things in the AIL. As we
 * process them, however, other items are added to the AIL. Hence we know we
 * have started recovery on all the pending intents when we find an non-intent
 * item in the AIL.
 */
STATIC int
xlog_recover_process_intents(
	struct xlog			*log)
{ … }

/*
 * A cancel occurs when the mount has failed and we're bailing out.  Release all
 * pending log intent items that we haven't started recovery on so they don't
 * pin the AIL.
 */
STATIC void
xlog_recover_cancel_intents(
	struct xlog			*log)
{ … }

/*
 * Transfer ownership of the recovered pending work to the recovery transaction
 * and try to finish the work.  If there is more work to be done, the dfp will
 * remain attached to the transaction.  If not, the dfp is freed.
 */
int
xlog_recover_finish_intent(
	struct xfs_trans		*tp,
	struct xfs_defer_pending	*dfp)
{ … }

/*
 * This routine performs a transaction to null out a bad inode pointer
 * in an agi unlinked inode hash bucket.
 */
STATIC void
xlog_recover_clear_agi_bucket(
	struct xfs_perag	*pag,
	int			bucket)
{ … }

static int
xlog_recover_iunlink_bucket(
	struct xfs_perag	*pag,
	struct xfs_agi		*agi,
	int			bucket)
{ … }

/*
 * Recover AGI unlinked lists
 *
 * This is called during recovery to process any inodes which we unlinked but
 * not freed when the system crashed.  These inodes will be on the lists in the
 * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
 * any inodes found on the lists. Each inode is removed from the lists when it
 * has been fully truncated and is freed. The freeing of the inode and its
 * removal from the list must be atomic.
 *
 * If everything we touch in the agi processing loop is already in memory, this
 * loop can hold the cpu for a long time. It runs without lock contention,
 * memory allocation contention, the need wait for IO, etc, and so will run
 * until we either run out of inodes to process, run low on memory or we run out
 * of log space.
 *
 * This behaviour is bad for latency on single CPU and non-preemptible kernels,
 * and can prevent other filesystem work (such as CIL pushes) from running. This
 * can lead to deadlocks if the recovery process runs out of log reservation
 * space. Hence we need to yield the CPU when there is other kernel work
 * scheduled on this CPU to ensure other scheduled work can run without undue
 * latency.
 */
static void
xlog_recover_iunlink_ag(
	struct xfs_perag	*pag)
{ … }

static void
xlog_recover_process_iunlinks(
	struct xlog	*log)
{ … }

STATIC void
xlog_unpack_data(
	struct xlog_rec_header	*rhead,
	char			*dp,
	struct xlog		*log)
{ … }

/*
 * CRC check, unpack and process a log record.
 */
STATIC int
xlog_recover_process(
	struct xlog		*log,
	struct hlist_head	rhash[],
	struct xlog_rec_header	*rhead,
	char			*dp,
	int			pass,
	struct list_head	*buffer_list)
{ … }

STATIC int
xlog_valid_rec_header(
	struct xlog		*log,
	struct xlog_rec_header	*rhead,
	xfs_daddr_t		blkno,
	int			bufsize)
{ … }

/*
 * Read the log from tail to head and process the log records found.
 * Handle the two cases where the tail and head are in the same cycle
 * and where the active portion of the log wraps around the end of
 * the physical log separately.  The pass parameter is passed through
 * to the routines called to process the data and is not looked at
 * here.
 */
STATIC int
xlog_do_recovery_pass(
	struct xlog		*log,
	xfs_daddr_t		head_blk,
	xfs_daddr_t		tail_blk,
	int			pass,
	xfs_daddr_t		*first_bad)	/* out: first bad log rec */
{ … }

/*
 * Do the recovery of the log.  We actually do this in two phases.
 * The two passes are necessary in order to implement the function
 * of cancelling a record written into the log.  The first pass
 * determines those things which have been cancelled, and the
 * second pass replays log items normally except for those which
 * have been cancelled.  The handling of the replay and cancellations
 * takes place in the log item type specific routines.
 *
 * The table of items which have cancel records in the log is allocated
 * and freed at this level, since only here do we know when all of
 * the log recovery has been completed.
 */
STATIC int
xlog_do_log_recovery(
	struct xlog	*log,
	xfs_daddr_t	head_blk,
	xfs_daddr_t	tail_blk)
{ … }

/*
 * Do the actual recovery
 */
STATIC int
xlog_do_recover(
	struct xlog		*log,
	xfs_daddr_t		head_blk,
	xfs_daddr_t		tail_blk)
{ … }

/*
 * Perform recovery and re-initialize some log variables in xlog_find_tail.
 *
 * Return error or zero.
 */
int
xlog_recover(
	struct xlog	*log)
{ … }

/*
 * In the first part of recovery we replay inodes and buffers and build up the
 * list of intents which need to be processed. Here we process the intents and
 * clean up the on disk unlinked inode lists. This is separated from the first
 * part of recovery so that the root and real-time bitmap inodes can be read in
 * from disk in between the two stages.  This is necessary so that we can free
 * space in the real-time portion of the file system.
 *
 * We run this whole process under GFP_NOFS allocation context. We do a
 * combination of non-transactional and transactional work, yet we really don't
 * want to recurse into the filesystem from direct reclaim during any of this
 * processing. This allows all the recovery code run here not to care about the
 * memory allocation context it is running in.
 */
int
xlog_recover_finish(
	struct xlog	*log)
{ … }

void
xlog_recover_cancel(
	struct xlog	*log)
{ … }
linux/fs/xfs/xfs_log_recover.c