linux/fs/xfs/libxfs/xfs_refcount.c

// SPDX-License-Identifier: GPL-2.0+
/*
 * Copyright (C) 2016 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <[email protected]>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_bmap.h"
#include "xfs_refcount_btree.h"
#include "xfs_alloc.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_bit.h"
#include "xfs_refcount.h"
#include "xfs_rmap.h"
#include "xfs_ag.h"
#include "xfs_health.h"
#include "xfs_refcount_item.h"

struct kmem_cache	*xfs_refcount_intent_cache;

/* Allowable refcount adjustment amounts. */
enum xfs_refc_adjust_op {};

STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur,
		xfs_agblock_t agbno, xfs_extlen_t aglen);
STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
		xfs_agblock_t agbno, xfs_extlen_t aglen);

/*
 * Look up the first record less than or equal to [bno, len] in the btree
 * given by cur.
 */
int
xfs_refcount_lookup_le(
	struct xfs_btree_cur	*cur,
	enum xfs_refc_domain	domain,
	xfs_agblock_t		bno,
	int			*stat)
{}

/*
 * Look up the first record greater than or equal to [bno, len] in the btree
 * given by cur.
 */
int
xfs_refcount_lookup_ge(
	struct xfs_btree_cur	*cur,
	enum xfs_refc_domain	domain,
	xfs_agblock_t		bno,
	int			*stat)
{}

/*
 * Look up the first record equal to [bno, len] in the btree
 * given by cur.
 */
int
xfs_refcount_lookup_eq(
	struct xfs_btree_cur	*cur,
	enum xfs_refc_domain	domain,
	xfs_agblock_t		bno,
	int			*stat)
{}

/* Convert on-disk record to in-core format. */
void
xfs_refcount_btrec_to_irec(
	const union xfs_btree_rec	*rec,
	struct xfs_refcount_irec	*irec)
{}

/* Simple checks for refcount records. */
xfs_failaddr_t
xfs_refcount_check_irec(
	struct xfs_perag		*pag,
	const struct xfs_refcount_irec	*irec)
{}

static inline int
xfs_refcount_complain_bad_rec(
	struct xfs_btree_cur		*cur,
	xfs_failaddr_t			fa,
	const struct xfs_refcount_irec	*irec)
{}

/*
 * Get the data from the pointed-to record.
 */
int
xfs_refcount_get_rec(
	struct xfs_btree_cur		*cur,
	struct xfs_refcount_irec	*irec,
	int				*stat)
{}

/*
 * Update the record referred to by cur to the value given
 * by [bno, len, refcount].
 * This either works (return 0) or gets an EFSCORRUPTED error.
 */
STATIC int
xfs_refcount_update(
	struct xfs_btree_cur		*cur,
	struct xfs_refcount_irec	*irec)
{}

/*
 * Insert the record referred to by cur to the value given
 * by [bno, len, refcount].
 * This either works (return 0) or gets an EFSCORRUPTED error.
 */
int
xfs_refcount_insert(
	struct xfs_btree_cur		*cur,
	struct xfs_refcount_irec	*irec,
	int				*i)
{}

/*
 * Remove the record referred to by cur, then set the pointer to the spot
 * where the record could be re-inserted, in case we want to increment or
 * decrement the cursor.
 * This either works (return 0) or gets an EFSCORRUPTED error.
 */
STATIC int
xfs_refcount_delete(
	struct xfs_btree_cur	*cur,
	int			*i)
{}

/*
 * Adjusting the Reference Count
 *
 * As stated elsewhere, the reference count btree (refcbt) stores
 * >1 reference counts for extents of physical blocks.  In this
 * operation, we're either raising or lowering the reference count of
 * some subrange stored in the tree:
 *
 *      <------ adjustment range ------>
 * ----+   +---+-----+ +--+--------+---------
 *  2  |   | 3 |  4  | |17|   55   |   10
 * ----+   +---+-----+ +--+--------+---------
 * X axis is physical blocks number;
 * reference counts are the numbers inside the rectangles
 *
 * The first thing we need to do is to ensure that there are no
 * refcount extents crossing either boundary of the range to be
 * adjusted.  For any extent that does cross a boundary, split it into
 * two extents so that we can increment the refcount of one of the
 * pieces later:
 *
 *      <------ adjustment range ------>
 * ----+   +---+-----+ +--+--------+----+----
 *  2  |   | 3 |  2  | |17|   55   | 10 | 10
 * ----+   +---+-----+ +--+--------+----+----
 *
 * For this next step, let's assume that all the physical blocks in
 * the adjustment range are mapped to a file and are therefore in use
 * at least once.  Therefore, we can infer that any gap in the
 * refcount tree within the adjustment range represents a physical
 * extent with refcount == 1:
 *
 *      <------ adjustment range ------>
 * ----+---+---+-----+-+--+--------+----+----
 *  2  |"1"| 3 |  2  |1|17|   55   | 10 | 10
 * ----+---+---+-----+-+--+--------+----+----
 *      ^
 *
 * For each extent that falls within the interval range, figure out
 * which extent is to the left or the right of that extent.  Now we
 * have a left, current, and right extent.  If the new reference count
 * of the center extent enables us to merge left, center, and right
 * into one record covering all three, do so.  If the center extent is
 * at the left end of the range, abuts the left extent, and its new
 * reference count matches the left extent's record, then merge them.
 * If the center extent is at the right end of the range, abuts the
 * right extent, and the reference counts match, merge those.  In the
 * example, we can left merge (assuming an increment operation):
 *
 *      <------ adjustment range ------>
 * --------+---+-----+-+--+--------+----+----
 *    2    | 3 |  2  |1|17|   55   | 10 | 10
 * --------+---+-----+-+--+--------+----+----
 *          ^
 *
 * For all other extents within the range, adjust the reference count
 * or delete it if the refcount falls below 2.  If we were
 * incrementing, the end result looks like this:
 *
 *      <------ adjustment range ------>
 * --------+---+-----+-+--+--------+----+----
 *    2    | 4 |  3  |2|18|   56   | 11 | 10
 * --------+---+-----+-+--+--------+----+----
 *
 * The result of a decrement operation looks as such:
 *
 *      <------ adjustment range ------>
 * ----+   +---+       +--+--------+----+----
 *  2  |   | 2 |       |16|   54   |  9 | 10
 * ----+   +---+       +--+--------+----+----
 *      DDDD    111111DD
 *
 * The blocks marked "D" are freed; the blocks marked "1" are only
 * referenced once and therefore the record is removed from the
 * refcount btree.
 */

/* Next block after this extent. */
static inline xfs_agblock_t
xfs_refc_next(
	struct xfs_refcount_irec	*rc)
{}

/*
 * Split a refcount extent that crosses agbno.
 */
STATIC int
xfs_refcount_split_extent(
	struct xfs_btree_cur		*cur,
	enum xfs_refc_domain		domain,
	xfs_agblock_t			agbno,
	bool				*shape_changed)
{}

/*
 * Merge the left, center, and right extents.
 */
STATIC int
xfs_refcount_merge_center_extents(
	struct xfs_btree_cur		*cur,
	struct xfs_refcount_irec	*left,
	struct xfs_refcount_irec	*center,
	struct xfs_refcount_irec	*right,
	unsigned long long		extlen,
	xfs_extlen_t			*aglen)
{}

/*
 * Merge with the left extent.
 */
STATIC int
xfs_refcount_merge_left_extent(
	struct xfs_btree_cur		*cur,
	struct xfs_refcount_irec	*left,
	struct xfs_refcount_irec	*cleft,
	xfs_agblock_t			*agbno,
	xfs_extlen_t			*aglen)
{}

/*
 * Merge with the right extent.
 */
STATIC int
xfs_refcount_merge_right_extent(
	struct xfs_btree_cur		*cur,
	struct xfs_refcount_irec	*right,
	struct xfs_refcount_irec	*cright,
	xfs_extlen_t			*aglen)
{}

/*
 * Find the left extent and the one after it (cleft).  This function assumes
 * that we've already split any extent crossing agbno.
 */
STATIC int
xfs_refcount_find_left_extents(
	struct xfs_btree_cur		*cur,
	struct xfs_refcount_irec	*left,
	struct xfs_refcount_irec	*cleft,
	enum xfs_refc_domain		domain,
	xfs_agblock_t			agbno,
	xfs_extlen_t			aglen)
{}

/*
 * Find the right extent and the one before it (cright).  This function
 * assumes that we've already split any extents crossing agbno + aglen.
 */
STATIC int
xfs_refcount_find_right_extents(
	struct xfs_btree_cur		*cur,
	struct xfs_refcount_irec	*right,
	struct xfs_refcount_irec	*cright,
	enum xfs_refc_domain		domain,
	xfs_agblock_t			agbno,
	xfs_extlen_t			aglen)
{}

/* Is this extent valid? */
static inline bool
xfs_refc_valid(
	const struct xfs_refcount_irec	*rc)
{}

static inline xfs_nlink_t
xfs_refc_merge_refcount(
	const struct xfs_refcount_irec	*irec,
	enum xfs_refc_adjust_op		adjust)
{}

static inline bool
xfs_refc_want_merge_center(
	const struct xfs_refcount_irec	*left,
	const struct xfs_refcount_irec	*cleft,
	const struct xfs_refcount_irec	*cright,
	const struct xfs_refcount_irec	*right,
	bool				cleft_is_cright,
	enum xfs_refc_adjust_op		adjust,
	unsigned long long		*ulenp)
{}

static inline bool
xfs_refc_want_merge_left(
	const struct xfs_refcount_irec	*left,
	const struct xfs_refcount_irec	*cleft,
	enum xfs_refc_adjust_op		adjust)
{}

static inline bool
xfs_refc_want_merge_right(
	const struct xfs_refcount_irec	*cright,
	const struct xfs_refcount_irec	*right,
	enum xfs_refc_adjust_op		adjust)
{}

/*
 * Try to merge with any extents on the boundaries of the adjustment range.
 */
STATIC int
xfs_refcount_merge_extents(
	struct xfs_btree_cur	*cur,
	enum xfs_refc_domain	domain,
	xfs_agblock_t		*agbno,
	xfs_extlen_t		*aglen,
	enum xfs_refc_adjust_op adjust,
	bool			*shape_changed)
{}

/*
 * XXX: This is a pretty hand-wavy estimate.  The penalty for guessing
 * true incorrectly is a shutdown FS; the penalty for guessing false
 * incorrectly is more transaction rolls than might be necessary.
 * Be conservative here.
 */
static bool
xfs_refcount_still_have_space(
	struct xfs_btree_cur		*cur)
{}

/*
 * Adjust the refcounts of middle extents.  At this point we should have
 * split extents that crossed the adjustment range; merged with adjacent
 * extents; and updated agbno/aglen to reflect the merges.  Therefore,
 * all we have to do is update the extents inside [agbno, agbno + aglen].
 */
STATIC int
xfs_refcount_adjust_extents(
	struct xfs_btree_cur	*cur,
	xfs_agblock_t		*agbno,
	xfs_extlen_t		*aglen,
	enum xfs_refc_adjust_op	adj)
{}

/* Adjust the reference count of a range of AG blocks. */
STATIC int
xfs_refcount_adjust(
	struct xfs_btree_cur	*cur,
	xfs_agblock_t		*agbno,
	xfs_extlen_t		*aglen,
	enum xfs_refc_adjust_op	adj)
{}

/*
 * Set up a continuation a deferred refcount operation by updating the intent.
 * Checks to make sure we're not going to run off the end of the AG.
 */
static inline int
xfs_refcount_continue_op(
	struct xfs_btree_cur		*cur,
	struct xfs_refcount_intent	*ri,
	xfs_agblock_t			new_agbno)
{}

/*
 * Process one of the deferred refcount operations.  We pass back the
 * btree cursor to maintain our lock on the btree between calls.
 * This saves time and eliminates a buffer deadlock between the
 * superblock and the AGF because we'll always grab them in the same
 * order.
 */
int
xfs_refcount_finish_one(
	struct xfs_trans		*tp,
	struct xfs_refcount_intent	*ri,
	struct xfs_btree_cur		**pcur)
{}

/*
 * Record a refcount intent for later processing.
 */
static void
__xfs_refcount_add(
	struct xfs_trans		*tp,
	enum xfs_refcount_intent_type	type,
	xfs_fsblock_t			startblock,
	xfs_extlen_t			blockcount)
{}

/*
 * Increase the reference count of the blocks backing a file's extent.
 */
void
xfs_refcount_increase_extent(
	struct xfs_trans		*tp,
	struct xfs_bmbt_irec		*PREV)
{}

/*
 * Decrease the reference count of the blocks backing a file's extent.
 */
void
xfs_refcount_decrease_extent(
	struct xfs_trans		*tp,
	struct xfs_bmbt_irec		*PREV)
{}

/*
 * Given an AG extent, find the lowest-numbered run of shared blocks
 * within that range and return the range in fbno/flen.  If
 * find_end_of_shared is set, return the longest contiguous extent of
 * shared blocks; if not, just return the first extent we find.  If no
 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK
 * and 0, respectively.
 */
int
xfs_refcount_find_shared(
	struct xfs_btree_cur		*cur,
	xfs_agblock_t			agbno,
	xfs_extlen_t			aglen,
	xfs_agblock_t			*fbno,
	xfs_extlen_t			*flen,
	bool				find_end_of_shared)
{}

/*
 * Recovering CoW Blocks After a Crash
 *
 * Due to the way that the copy on write mechanism works, there's a window of
 * opportunity in which we can lose track of allocated blocks during a crash.
 * Because CoW uses delayed allocation in the in-core CoW fork, writeback
 * causes blocks to be allocated and stored in the CoW fork.  The blocks are
 * no longer in the free space btree but are not otherwise recorded anywhere
 * until the write completes and the blocks are mapped into the file.  A crash
 * in between allocation and remapping results in the replacement blocks being
 * lost.  This situation is exacerbated by the CoW extent size hint because
 * allocations can hang around for long time.
 *
 * However, there is a place where we can record these allocations before they
 * become mappings -- the reference count btree.  The btree does not record
 * extents with refcount == 1, so we can record allocations with a refcount of
 * 1.  Blocks being used for CoW writeout cannot be shared, so there should be
 * no conflict with shared block records.  These mappings should be created
 * when we allocate blocks to the CoW fork and deleted when they're removed
 * from the CoW fork.
 *
 * Minor nit: records for in-progress CoW allocations and records for shared
 * extents must never be merged, to preserve the property that (except for CoW
 * allocations) there are no refcount btree entries with refcount == 1.  The
 * only time this could potentially happen is when unsharing a block that's
 * adjacent to CoW allocations, so we must be careful to avoid this.
 *
 * At mount time we recover lost CoW allocations by searching the refcount
 * btree for these refcount == 1 mappings.  These represent CoW allocations
 * that were in progress at the time the filesystem went down, so we can free
 * them to get the space back.
 *
 * This mechanism is superior to creating EFIs for unmapped CoW extents for
 * several reasons -- first, EFIs pin the tail of the log and would have to be
 * periodically relogged to avoid filling up the log.  Second, CoW completions
 * will have to file an EFD and create new EFIs for whatever remains in the
 * CoW fork; this partially takes care of (1) but extent-size reservations
 * will have to periodically relog even if there's no writeout in progress.
 * This can happen if the CoW extent size hint is set, which you really want.
 * Third, EFIs cannot currently be automatically relogged into newer
 * transactions to advance the log tail.  Fourth, stuffing the log full of
 * EFIs places an upper bound on the number of CoW allocations that can be
 * held filesystem-wide at any given time.  Recording them in the refcount
 * btree doesn't require us to maintain any state in memory and doesn't pin
 * the log.
 */
/*
 * Adjust the refcounts of CoW allocations.  These allocations are "magic"
 * in that they're not referenced anywhere else in the filesystem, so we
 * stash them in the refcount btree with a refcount of 1 until either file
 * remapping (or CoW cancellation) happens.
 */
STATIC int
xfs_refcount_adjust_cow_extents(
	struct xfs_btree_cur	*cur,
	xfs_agblock_t		agbno,
	xfs_extlen_t		aglen,
	enum xfs_refc_adjust_op	adj)
{}

/*
 * Add or remove refcount btree entries for CoW reservations.
 */
STATIC int
xfs_refcount_adjust_cow(
	struct xfs_btree_cur	*cur,
	xfs_agblock_t		agbno,
	xfs_extlen_t		aglen,
	enum xfs_refc_adjust_op	adj)
{}

/*
 * Record a CoW allocation in the refcount btree.
 */
STATIC int
__xfs_refcount_cow_alloc(
	struct xfs_btree_cur	*rcur,
	xfs_agblock_t		agbno,
	xfs_extlen_t		aglen)
{}

/*
 * Remove a CoW allocation from the refcount btree.
 */
STATIC int
__xfs_refcount_cow_free(
	struct xfs_btree_cur	*rcur,
	xfs_agblock_t		agbno,
	xfs_extlen_t		aglen)
{}

/* Record a CoW staging extent in the refcount btree. */
void
xfs_refcount_alloc_cow_extent(
	struct xfs_trans		*tp,
	xfs_fsblock_t			fsb,
	xfs_extlen_t			len)
{}

/* Forget a CoW staging event in the refcount btree. */
void
xfs_refcount_free_cow_extent(
	struct xfs_trans		*tp,
	xfs_fsblock_t			fsb,
	xfs_extlen_t			len)
{}

struct xfs_refcount_recovery {};

/* Stuff an extent on the recovery list. */
STATIC int
xfs_refcount_recover_extent(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_rec	*rec,
	void				*priv)
{}

/* Find and remove leftover CoW reservations. */
int
xfs_refcount_recover_cow_leftovers(
	struct xfs_mount		*mp,
	struct xfs_perag		*pag)
{}

/*
 * Scan part of the keyspace of the refcount records and tell us if the area
 * has no records, is fully mapped by records, or is partially filled.
 */
int
xfs_refcount_has_records(
	struct xfs_btree_cur	*cur,
	enum xfs_refc_domain	domain,
	xfs_agblock_t		bno,
	xfs_extlen_t		len,
	enum xbtree_recpacking	*outcome)
{}

struct xfs_refcount_query_range_info {};

/* Format btree record and pass to our callback. */
STATIC int
xfs_refcount_query_range_helper(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_rec	*rec,
	void				*priv)
{}

/* Find all refcount records between two keys. */
int
xfs_refcount_query_range(
	struct xfs_btree_cur		*cur,
	const struct xfs_refcount_irec	*low_rec,
	const struct xfs_refcount_irec	*high_rec,
	xfs_refcount_query_range_fn	fn,
	void				*priv)
{}

int __init
xfs_refcount_intent_init_cache(void)
{}

void
xfs_refcount_intent_destroy_cache(void)
{}