linux/fs/btrfs/relocation.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2009 Oracle.  All rights reserved.
 */

#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/error-injection.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "volumes.h"
#include "locking.h"
#include "btrfs_inode.h"
#include "async-thread.h"
#include "free-space-cache.h"
#include "qgroup.h"
#include "print-tree.h"
#include "delalloc-space.h"
#include "block-group.h"
#include "backref.h"
#include "misc.h"
#include "subpage.h"
#include "zoned.h"
#include "inode-item.h"
#include "space-info.h"
#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
#include "root-tree.h"
#include "file-item.h"
#include "relocation.h"
#include "super.h"
#include "tree-checker.h"
#include "raid-stripe-tree.h"

/*
 * Relocation overview
 *
 * [What does relocation do]
 *
 * The objective of relocation is to relocate all extents of the target block
 * group to other block groups.
 * This is utilized by resize (shrink only), profile converting, compacting
 * space, or balance routine to spread chunks over devices.
 *
 * 		Before		|		After
 * ------------------------------------------------------------------
 *  BG A: 10 data extents	| BG A: deleted
 *  BG B:  2 data extents	| BG B: 10 data extents (2 old + 8 relocated)
 *  BG C:  1 extents		| BG C:  3 data extents (1 old + 2 relocated)
 *
 * [How does relocation work]
 *
 * 1.   Mark the target block group read-only
 *      New extents won't be allocated from the target block group.
 *
 * 2.1  Record each extent in the target block group
 *      To build a proper map of extents to be relocated.
 *
 * 2.2  Build data reloc tree and reloc trees
 *      Data reloc tree will contain an inode, recording all newly relocated
 *      data extents.
 *      There will be only one data reloc tree for one data block group.
 *
 *      Reloc tree will be a special snapshot of its source tree, containing
 *      relocated tree blocks.
 *      Each tree referring to a tree block in target block group will get its
 *      reloc tree built.
 *
 * 2.3  Swap source tree with its corresponding reloc tree
 *      Each involved tree only refers to new extents after swap.
 *
 * 3.   Cleanup reloc trees and data reloc tree.
 *      As old extents in the target block group are still referenced by reloc
 *      trees, we need to clean them up before really freeing the target block
 *      group.
 *
 * The main complexity is in steps 2.2 and 2.3.
 *
 * The entry point of relocation is relocate_block_group() function.
 */

#define RELOCATION_RESERVED_NODES
/*
 * map address of tree root to tree
 */
struct mapping_node {};

struct mapping_tree {};

/*
 * present a tree block to process
 */
struct tree_block {};

#define MAX_EXTENTS

struct file_extent_cluster {};

/* Stages of data relocation. */
enum reloc_stage {};

struct reloc_control {};

static void mark_block_processed(struct reloc_control *rc,
				 struct btrfs_backref_node *node)
{}

/*
 * walk up backref nodes until reach node presents tree root
 */
static struct btrfs_backref_node *walk_up_backref(
		struct btrfs_backref_node *node,
		struct btrfs_backref_edge *edges[], int *index)
{}

/*
 * walk down backref nodes to find start of next reference path
 */
static struct btrfs_backref_node *walk_down_backref(
		struct btrfs_backref_edge *edges[], int *index)
{}

static bool reloc_root_is_dead(const struct btrfs_root *root)
{}

/*
 * Check if this subvolume tree has valid reloc tree.
 *
 * Reloc tree after swap is considered dead, thus not considered as valid.
 * This is enough for most callers, as they don't distinguish dead reloc root
 * from no reloc root.  But btrfs_should_ignore_reloc_root() below is a
 * special case.
 */
static bool have_reloc_root(const struct btrfs_root *root)
{}

bool btrfs_should_ignore_reloc_root(const struct btrfs_root *root)
{}

/*
 * find reloc tree by address of tree root
 */
struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
{}

/*
 * For useless nodes, do two major clean ups:
 *
 * - Cleanup the children edges and nodes
 *   If child node is also orphan (no parent) during cleanup, then the child
 *   node will also be cleaned up.
 *
 * - Freeing up leaves (level 0), keeps nodes detached
 *   For nodes, the node is still cached as "detached"
 *
 * Return false if @node is not in the @useless_nodes list.
 * Return true if @node is in the @useless_nodes list.
 */
static bool handle_useless_nodes(struct reloc_control *rc,
				 struct btrfs_backref_node *node)
{}

/*
 * Build backref tree for a given tree block. Root of the backref tree
 * corresponds the tree block, leaves of the backref tree correspond roots of
 * b-trees that reference the tree block.
 *
 * The basic idea of this function is check backrefs of a given block to find
 * upper level blocks that reference the block, and then check backrefs of
 * these upper level blocks recursively. The recursion stops when tree root is
 * reached or backrefs for the block is cached.
 *
 * NOTE: if we find that backrefs for a block are cached, we know backrefs for
 * all upper level blocks that directly/indirectly reference the block are also
 * cached.
 */
static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
			struct btrfs_trans_handle *trans,
			struct reloc_control *rc, struct btrfs_key *node_key,
			int level, u64 bytenr)
{}

/*
 * helper to add backref node for the newly created snapshot.
 * the backref node is created by cloning backref node that
 * corresponds to root of source tree
 */
static int clone_backref_node(struct btrfs_trans_handle *trans,
			      struct reloc_control *rc,
			      const struct btrfs_root *src,
			      struct btrfs_root *dest)
{}

/*
 * helper to add 'address of tree root -> reloc tree' mapping
 */
static int __add_reloc_root(struct btrfs_root *root)
{}

/*
 * helper to delete the 'address of tree root -> reloc tree'
 * mapping
 */
static void __del_reloc_root(struct btrfs_root *root)
{}

/*
 * helper to update the 'address of tree root -> reloc tree'
 * mapping
 */
static int __update_reloc_root(struct btrfs_root *root)
{}

static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
					struct btrfs_root *root, u64 objectid)
{}

/*
 * create reloc tree for a given fs tree. reloc tree is just a
 * snapshot of the fs tree with special root objectid.
 *
 * The reloc_root comes out of here with two references, one for
 * root->reloc_root, and another for being on the rc->reloc_roots list.
 */
int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root)
{}

/*
 * update root item of reloc tree
 */
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root)
{}

/*
 * get new location of data
 */
static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
			    u64 bytenr, u64 num_bytes)
{}

/*
 * update file extent items in the tree leaf to point to
 * the new locations.
 */
static noinline_for_stack
int replace_file_extents(struct btrfs_trans_handle *trans,
			 struct reloc_control *rc,
			 struct btrfs_root *root,
			 struct extent_buffer *leaf)
{}

static noinline_for_stack int memcmp_node_keys(const struct extent_buffer *eb,
					       int slot, const struct btrfs_path *path,
					       int level)
{}

/*
 * try to replace tree blocks in fs tree with the new blocks
 * in reloc tree. tree blocks haven't been modified since the
 * reloc tree was create can be replaced.
 *
 * if a block was replaced, level of the block + 1 is returned.
 * if no block got replaced, 0 is returned. if there are other
 * errors, a negative error number is returned.
 */
static noinline_for_stack
int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
		 struct btrfs_root *dest, struct btrfs_root *src,
		 struct btrfs_path *path, struct btrfs_key *next_key,
		 int lowest_level, int max_level)
{}

/*
 * helper to find next relocated block in reloc tree
 */
static noinline_for_stack
int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
		       int *level)
{}

/*
 * walk down reloc tree to find relocated block of lowest level
 */
static noinline_for_stack
int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
			 int *level)
{}

/*
 * invalidate extent cache for file extents whose key in range of
 * [min_key, max_key)
 */
static int invalidate_extent_cache(struct btrfs_root *root,
				   const struct btrfs_key *min_key,
				   const struct btrfs_key *max_key)
{}

static int find_next_key(struct btrfs_path *path, int level,
			 struct btrfs_key *key)

{}

/*
 * Insert current subvolume into reloc_control::dirty_subvol_roots
 */
static int insert_dirty_subvol(struct btrfs_trans_handle *trans,
			       struct reloc_control *rc,
			       struct btrfs_root *root)
{}

static int clean_dirty_subvols(struct reloc_control *rc)
{}

/*
 * merge the relocated tree blocks in reloc tree with corresponding
 * fs tree.
 */
static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
					       struct btrfs_root *root)
{}

static noinline_for_stack
int prepare_to_merge(struct reloc_control *rc, int err)
{}

static noinline_for_stack
void free_reloc_roots(struct list_head *list)
{}

static noinline_for_stack
void merge_reloc_roots(struct reloc_control *rc)
{}

static void free_block_list(struct rb_root *blocks)
{}

static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
				      struct btrfs_root *reloc_root)
{}

static noinline_for_stack
struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
				     struct reloc_control *rc,
				     struct btrfs_backref_node *node,
				     struct btrfs_backref_edge *edges[])
{}

/*
 * Select a tree root for relocation.
 *
 * Return NULL if the block is not shareable. We should use do_relocation() in
 * this case.
 *
 * Return a tree root pointer if the block is shareable.
 * Return -ENOENT if the block is root of reloc tree.
 */
static noinline_for_stack
struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
{}

static noinline_for_stack u64 calcu_metadata_size(struct reloc_control *rc,
						  struct btrfs_backref_node *node)
{}

static int reserve_metadata_space(struct btrfs_trans_handle *trans,
				  struct reloc_control *rc,
				  struct btrfs_backref_node *node)
{}

/*
 * relocate a block tree, and then update pointers in upper level
 * blocks that reference the block to point to the new location.
 *
 * if called by link_to_upper, the block has already been relocated.
 * in that case this function just updates pointers.
 */
static int do_relocation(struct btrfs_trans_handle *trans,
			 struct reloc_control *rc,
			 struct btrfs_backref_node *node,
			 struct btrfs_key *key,
			 struct btrfs_path *path, int lowest)
{}

static int link_to_upper(struct btrfs_trans_handle *trans,
			 struct reloc_control *rc,
			 struct btrfs_backref_node *node,
			 struct btrfs_path *path)
{}

static int finish_pending_nodes(struct btrfs_trans_handle *trans,
				struct reloc_control *rc,
				struct btrfs_path *path, int err)
{}

/*
 * mark a block and all blocks directly/indirectly reference the block
 * as processed.
 */
static void update_processed_blocks(struct reloc_control *rc,
				    struct btrfs_backref_node *node)
{}

static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
{}

static int get_tree_block_key(struct btrfs_fs_info *fs_info,
			      struct tree_block *block)
{}

/*
 * helper function to relocate a tree block
 */
static int relocate_tree_block(struct btrfs_trans_handle *trans,
				struct reloc_control *rc,
				struct btrfs_backref_node *node,
				struct btrfs_key *key,
				struct btrfs_path *path)
{}

/*
 * relocate a list of blocks
 */
static noinline_for_stack
int relocate_tree_blocks(struct btrfs_trans_handle *trans,
			 struct reloc_control *rc, struct rb_root *blocks)
{}

static noinline_for_stack int prealloc_file_extent_cluster(struct reloc_control *rc)
{}

static noinline_for_stack int setup_relocation_extent_mapping(struct reloc_control *rc)
{}

/*
 * Allow error injection to test balance/relocation cancellation
 */
noinline int btrfs_should_cancel_balance(const struct btrfs_fs_info *fs_info)
{}
ALLOW_ERROR_INJECTION();

static u64 get_cluster_boundary_end(const struct file_extent_cluster *cluster,
				    int cluster_nr)
{}

static int relocate_one_folio(struct reloc_control *rc,
			      struct file_ra_state *ra,
			      int *cluster_nr, unsigned long index)
{}

static int relocate_file_extent_cluster(struct reloc_control *rc)
{}

static noinline_for_stack int relocate_data_extent(struct reloc_control *rc,
					   const struct btrfs_key *extent_key)
{}

/*
 * helper to add a tree block to the list.
 * the major work is getting the generation and level of the block
 */
static int add_tree_block(struct reloc_control *rc,
			  const struct btrfs_key *extent_key,
			  struct btrfs_path *path,
			  struct rb_root *blocks)
{}

/*
 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
 */
static int __add_tree_block(struct reloc_control *rc,
			    u64 bytenr, u32 blocksize,
			    struct rb_root *blocks)
{}

static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
				    struct btrfs_block_group *block_group,
				    struct inode *inode,
				    u64 ino)
{}

/*
 * Locate the free space cache EXTENT_DATA in root tree leaf and delete the
 * cache inode, to avoid free space cache data extent blocking data relocation.
 */
static int delete_v1_space_cache(struct extent_buffer *leaf,
				 struct btrfs_block_group *block_group,
				 u64 data_bytenr)
{}

/*
 * helper to find all tree blocks that reference a given data extent
 */
static noinline_for_stack int add_data_references(struct reloc_control *rc,
						  const struct btrfs_key *extent_key,
						  struct btrfs_path *path,
						  struct rb_root *blocks)
{}

/*
 * helper to find next unprocessed extent
 */
static noinline_for_stack
int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
		     struct btrfs_key *extent_key)
{}

static void set_reloc_control(struct reloc_control *rc)
{}

static void unset_reloc_control(struct reloc_control *rc)
{}

static noinline_for_stack
int prepare_to_relocate(struct reloc_control *rc)
{}

static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
{}

static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root, u64 objectid)
{}

static void delete_orphan_inode(struct btrfs_trans_handle *trans,
				struct btrfs_root *root, u64 objectid)
{}

/*
 * helper to create inode for data relocation.
 * the inode is in data relocation tree and its link count is 0
 */
static noinline_for_stack struct inode *create_reloc_inode(
					struct btrfs_fs_info *fs_info,
					const struct btrfs_block_group *group)
{}

/*
 * Mark start of chunk relocation that is cancellable. Check if the cancellation
 * has been requested meanwhile and don't start in that case.
 *
 * Return:
 *   0             success
 *   -EINPROGRESS  operation is already in progress, that's probably a bug
 *   -ECANCELED    cancellation request was set before the operation started
 */
static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
{}

/*
 * Mark end of chunk relocation that is cancellable and wake any waiters.
 */
static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
{}

static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
{}

static void free_reloc_control(struct reloc_control *rc)
{}

/*
 * Print the block group being relocated
 */
static void describe_relocation(struct btrfs_block_group *block_group)
{}

static const char *stage_to_string(enum reloc_stage stage)
{}

/*
 * function to relocate all extents in a block group.
 */
int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
{}

static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
{}

/*
 * recover relocation interrupted by system crash.
 *
 * this function resumes merging reloc trees with corresponding fs trees.
 * this is important for keeping the sharing of tree blocks
 */
int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
{}

/*
 * helper to add ordered checksum for data relocation.
 *
 * cloning checksum properly handles the nodatasum extents.
 * it also saves CPU time to re-calculate the checksum.
 */
int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered)
{}

int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root,
			  const struct extent_buffer *buf,
			  struct extent_buffer *cow)
{}

/*
 * called before creating snapshot. it calculates metadata reservation
 * required for relocating tree blocks in the snapshot
 */
void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
			      u64 *bytes_to_reserve)
{}

/*
 * called after snapshot is created. migrate block reservation
 * and create reloc root for the newly created snapshot
 *
 * This is similar to btrfs_init_reloc_root(), we come out of here with two
 * references held on the reloc_root, one for root->reloc_root and one for
 * rc->reloc_roots.
 */
int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
			       struct btrfs_pending_snapshot *pending)
{}

/*
 * Get the current bytenr for the block group which is being relocated.
 *
 * Return U64_MAX if no running relocation.
 */
u64 btrfs_get_reloc_bg_bytenr(const struct btrfs_fs_info *fs_info)
{}