// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2011 STRATO. All rights reserved. */ #include <linux/mm.h> #include <linux/rbtree.h> #include <trace/events/btrfs.h> #include "ctree.h" #include "disk-io.h" #include "backref.h" #include "ulist.h" #include "transaction.h" #include "delayed-ref.h" #include "locking.h" #include "misc.h" #include "tree-mod-log.h" #include "fs.h" #include "accessors.h" #include "extent-tree.h" #include "relocation.h" #include "tree-checker.h" /* Just arbitrary numbers so we can be sure one of these happened. */ #define BACKREF_FOUND_SHARED … #define BACKREF_FOUND_NOT_SHARED … struct extent_inode_elem { … }; static int check_extent_in_eb(struct btrfs_backref_walk_ctx *ctx, const struct btrfs_key *key, const struct extent_buffer *eb, const struct btrfs_file_extent_item *fi, struct extent_inode_elem **eie) { … } static void free_inode_elem_list(struct extent_inode_elem *eie) { … } static int find_extent_in_eb(struct btrfs_backref_walk_ctx *ctx, const struct extent_buffer *eb, struct extent_inode_elem **eie) { … } struct preftree { … }; #define PREFTREE_INIT … struct preftrees { … }; /* * Checks for a shared extent during backref search. * * The share_count tracks prelim_refs (direct and indirect) having a * ref->count >0: * - incremented when a ref->count transitions to >0 * - decremented when a ref->count transitions to <1 */ struct share_check { … }; static inline int extent_is_shared(struct share_check *sc) { … } static struct kmem_cache *btrfs_prelim_ref_cache; int __init btrfs_prelim_ref_init(void) { … } void __cold btrfs_prelim_ref_exit(void) { … } static void free_pref(struct prelim_ref *ref) { … } /* * Return 0 when both refs are for the same block (and can be merged). * A -1 return indicates ref1 is a 'lower' block than ref2, while 1 * indicates a 'higher' block. */ static int prelim_ref_compare(const struct prelim_ref *ref1, const struct prelim_ref *ref2) { … } static void update_share_count(struct share_check *sc, int oldcount, int newcount, const struct prelim_ref *newref) { … } /* * Add @newref to the @root rbtree, merging identical refs. * * Callers should assume that newref has been freed after calling. */ static void prelim_ref_insert(const struct btrfs_fs_info *fs_info, struct preftree *preftree, struct prelim_ref *newref, struct share_check *sc) { … } /* * Release the entire tree. We don't care about internal consistency so * just free everything and then reset the tree root. */ static void prelim_release(struct preftree *preftree) { … } /* * the rules for all callers of this function are: * - obtaining the parent is the goal * - if you add a key, you must know that it is a correct key * - if you cannot add the parent or a correct key, then we will look into the * block later to set a correct key * * delayed refs * ============ * backref type | shared | indirect | shared | indirect * information | tree | tree | data | data * --------------------+--------+----------+--------+---------- * parent logical | y | - | - | - * key to resolve | - | y | y | y * tree block logical | - | - | - | - * root for resolving | y | y | y | y * * - column 1: we've the parent -> done * - column 2, 3, 4: we use the key to find the parent * * on disk refs (inline or keyed) * ============================== * backref type | shared | indirect | shared | indirect * information | tree | tree | data | data * --------------------+--------+----------+--------+---------- * parent logical | y | - | y | - * key to resolve | - | - | - | y * tree block logical | y | y | y | y * root for resolving | - | y | y | y * * - column 1, 3: we've the parent -> done * - column 2: we take the first key from the block to find the parent * (see add_missing_keys) * - column 4: we use the key to find the parent * * additional information that's available but not required to find the parent * block might help in merging entries to gain some speed. */ static int add_prelim_ref(const struct btrfs_fs_info *fs_info, struct preftree *preftree, u64 root_id, const struct btrfs_key *key, int level, u64 parent, u64 wanted_disk_byte, int count, struct share_check *sc, gfp_t gfp_mask) { … } /* direct refs use root == 0, key == NULL */ static int add_direct_ref(const struct btrfs_fs_info *fs_info, struct preftrees *preftrees, int level, u64 parent, u64 wanted_disk_byte, int count, struct share_check *sc, gfp_t gfp_mask) { … } /* indirect refs use parent == 0 */ static int add_indirect_ref(const struct btrfs_fs_info *fs_info, struct preftrees *preftrees, u64 root_id, const struct btrfs_key *key, int level, u64 wanted_disk_byte, int count, struct share_check *sc, gfp_t gfp_mask) { … } static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr) { … } static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, struct btrfs_root *root, struct btrfs_path *path, struct ulist *parents, struct preftrees *preftrees, struct prelim_ref *ref, int level) { … } /* * resolve an indirect backref in the form (root_id, key, level) * to a logical address */ static int resolve_indirect_ref(struct btrfs_backref_walk_ctx *ctx, struct btrfs_path *path, struct preftrees *preftrees, struct prelim_ref *ref, struct ulist *parents) { … } static struct extent_inode_elem * unode_aux_to_inode_list(struct ulist_node *node) { … } static void free_leaf_list(struct ulist *ulist) { … } /* * We maintain three separate rbtrees: one for direct refs, one for * indirect refs which have a key, and one for indirect refs which do not * have a key. Each tree does merge on insertion. * * Once all of the references are located, we iterate over the tree of * indirect refs with missing keys. An appropriate key is located and * the ref is moved onto the tree for indirect refs. After all missing * keys are thus located, we iterate over the indirect ref tree, resolve * each reference, and then insert the resolved reference onto the * direct tree (merging there too). * * New backrefs (i.e., for parent nodes) are added to the appropriate * rbtree as they are encountered. The new backrefs are subsequently * resolved as above. */ static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx, struct btrfs_path *path, struct preftrees *preftrees, struct share_check *sc) { … } /* * read tree blocks and add keys where required. */ static int add_missing_keys(struct btrfs_fs_info *fs_info, struct preftrees *preftrees, bool lock) { … } /* * add all currently queued delayed refs from this head whose seq nr is * smaller or equal that seq to the list */ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head, u64 seq, struct preftrees *preftrees, struct share_check *sc) { … } /* * add all inline backrefs for bytenr to the list * * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED. */ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx, struct btrfs_path *path, int *info_level, struct preftrees *preftrees, struct share_check *sc) { … } /* * add all non-inline backrefs for bytenr to the list * * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED. */ static int add_keyed_refs(struct btrfs_backref_walk_ctx *ctx, struct btrfs_root *extent_root, struct btrfs_path *path, int info_level, struct preftrees *preftrees, struct share_check *sc) { … } /* * The caller has joined a transaction or is holding a read lock on the * fs_info->commit_root_sem semaphore, so no need to worry about the root's last * snapshot field changing while updating or checking the cache. */ static bool lookup_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx, struct btrfs_root *root, u64 bytenr, int level, bool *is_shared) { … } /* * The caller has joined a transaction or is holding a read lock on the * fs_info->commit_root_sem semaphore, so no need to worry about the root's last * snapshot field changing while updating or checking the cache. */ static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx, struct btrfs_root *root, u64 bytenr, int level, bool is_shared) { … } /* * this adds all existing backrefs (inline backrefs, backrefs and delayed * refs) for the given bytenr to the refs list, merges duplicates and resolves * indirect refs to their parent bytenr. * When roots are found, they're added to the roots list * * @ctx: Backref walking context object, must be not NULL. * @sc: If !NULL, then immediately return BACKREF_FOUND_SHARED when a * shared extent is detected. * * Otherwise this returns 0 for success and <0 for an error. * * FIXME some caching might speed things up */ static int find_parent_nodes(struct btrfs_backref_walk_ctx *ctx, struct share_check *sc) { … } /* * Finds all leaves with a reference to the specified combination of * @ctx->bytenr and @ctx->extent_item_pos. The bytenr of the found leaves are * added to the ulist at @ctx->refs, and that ulist is allocated by this * function. The caller should free the ulist with free_leaf_list() if * @ctx->ignore_extent_item_pos is false, otherwise a fimple ulist_free() is * enough. * * Returns 0 on success and < 0 on error. On error @ctx->refs is not allocated. */ int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx) { … } /* * Walk all backrefs for a given extent to find all roots that reference this * extent. Walking a backref means finding all extents that reference this * extent and in turn walk the backrefs of those, too. Naturally this is a * recursive process, but here it is implemented in an iterative fashion: We * find all referencing extents for the extent in question and put them on a * list. In turn, we find all referencing extents for those, further appending * to the list. The way we iterate the list allows adding more elements after * the current while iterating. The process stops when we reach the end of the * list. * * Found roots are added to @ctx->roots, which is allocated by this function if * it points to NULL, in which case the caller is responsible for freeing it * after it's not needed anymore. * This function requires @ctx->refs to be NULL, as it uses it for allocating a * ulist to do temporary work, and frees it before returning. * * Returns 0 on success, < 0 on error. */ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx) { … } int btrfs_find_all_roots(struct btrfs_backref_walk_ctx *ctx, bool skip_commit_root_sem) { … } struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void) { … } void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx) { … } /* * Check if a data extent is shared or not. * * @inode: The inode whose extent we are checking. * @bytenr: Logical bytenr of the extent we are checking. * @extent_gen: Generation of the extent (file extent item) or 0 if it is * not known. * @ctx: A backref sharedness check context. * * btrfs_is_data_extent_shared uses the backref walking code but will short * circuit as soon as it finds a root or inode that doesn't match the * one passed in. This provides a significant performance benefit for * callers (such as fiemap) which want to know whether the extent is * shared but do not need a ref count. * * This attempts to attach to the running transaction in order to account for * delayed refs, but continues on even when no running transaction exists. * * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. */ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, u64 extent_gen, struct btrfs_backref_share_check_ctx *ctx) { … } int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, u64 start_off, struct btrfs_path *path, struct btrfs_inode_extref **ret_extref, u64 *found_off) { … } /* * this iterates to turn a name (from iref/extref) into a full filesystem path. * Elements of the path are separated by '/' and the path is guaranteed to be * 0-terminated. the path is only given within the current file system. * Therefore, it never starts with a '/'. the caller is responsible to provide * "size" bytes in "dest". the dest buffer will be filled backwards. finally, * the start point of the resulting string is returned. this pointer is within * dest, normally. * in case the path buffer would overflow, the pointer is decremented further * as if output was written to the buffer, though no more output is actually * generated. that way, the caller can determine how much space would be * required for the path to fit into the buffer. in that case, the returned * value will be smaller than dest. callers must check this! */ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, u32 name_len, unsigned long name_off, struct extent_buffer *eb_in, u64 parent, char *dest, u32 size) { … } /* * this makes the path point to (logical EXTENT_ITEM *) * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for * tree blocks and <0 on error. */ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, struct btrfs_path *path, struct btrfs_key *found_key, u64 *flags_ret) { … } /* * helper function to iterate extent inline refs. ptr must point to a 0 value * for the first call and may be modified. it is used to track state. * if more refs exist, 0 is returned and the next call to * get_extent_inline_ref must pass the modified ptr parameter to get the * next ref. after the last ref was processed, 1 is returned. * returns <0 on error */ static int get_extent_inline_ref(unsigned long *ptr, const struct extent_buffer *eb, const struct btrfs_key *key, const struct btrfs_extent_item *ei, u32 item_size, struct btrfs_extent_inline_ref **out_eiref, int *out_type) { … } /* * reads the tree block backref for an extent. tree level and root are returned * through out_level and out_root. ptr must point to a 0 value for the first * call and may be modified (see get_extent_inline_ref comment). * returns 0 if data was provided, 1 if there was no more data to provide or * <0 on error. */ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, struct btrfs_key *key, struct btrfs_extent_item *ei, u32 item_size, u64 *out_root, u8 *out_level) { … } static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, struct extent_inode_elem *inode_list, u64 root, u64 extent_item_objectid, iterate_extent_inodes_t *iterate, void *ctx) { … } /* * calls iterate() for every inode that references the extent identified by * the given parameters. * when the iterator function returns a non-zero value, iteration stops. */ int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx, bool search_commit_root, iterate_extent_inodes_t *iterate, void *user_ctx) { … } static int build_ino_list(u64 inum, u64 offset, u64 num_bytes, u64 root, void *ctx) { … } int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, struct btrfs_path *path, void *ctx, bool ignore_offset) { … } static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, struct extent_buffer *eb, struct inode_fs_paths *ipath); static int iterate_inode_refs(u64 inum, struct inode_fs_paths *ipath) { … } static int iterate_inode_extrefs(u64 inum, struct inode_fs_paths *ipath) { … } /* * returns 0 if the path could be dumped (probably truncated) * returns <0 in case of an error */ static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, struct extent_buffer *eb, struct inode_fs_paths *ipath) { … } /* * this dumps all file system paths to the inode into the ipath struct, provided * is has been created large enough. each path is zero-terminated and accessed * from ipath->fspath->val[i]. * when it returns, there are ipath->fspath->elem_cnt number of paths available * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the * number of missed paths is recorded in ipath->fspath->elem_missed, otherwise, * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would * have been needed to return all paths. */ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) { … } struct btrfs_data_container *init_data_container(u32 total_bytes) { … } /* * allocates space to return multiple file system paths for an inode. * total_bytes to allocate are passed, note that space usable for actual path * information will be total_bytes - sizeof(struct inode_fs_paths). * the returned pointer must be freed with free_ipath() in the end. */ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, struct btrfs_path *path) { … } void free_ipath(struct inode_fs_paths *ipath) { … } struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_info) { … } static void btrfs_backref_iter_release(struct btrfs_backref_iter *iter) { … } int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr) { … } static bool btrfs_backref_iter_is_inline_ref(struct btrfs_backref_iter *iter) { … } /* * Go to the next backref item of current bytenr, can be either inlined or * keyed. * * Caller needs to check whether it's inline ref or not by iter->cur_key. * * Return 0 if we get next backref without problem. * Return >0 if there is no extra backref for this bytenr. * Return <0 if there is something wrong happened. */ int btrfs_backref_iter_next(struct btrfs_backref_iter *iter) { … } void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info, struct btrfs_backref_cache *cache, bool is_reloc) { … } struct btrfs_backref_node *btrfs_backref_alloc_node( struct btrfs_backref_cache *cache, u64 bytenr, int level) { … } void btrfs_backref_free_node(struct btrfs_backref_cache *cache, struct btrfs_backref_node *node) { … } struct btrfs_backref_edge *btrfs_backref_alloc_edge( struct btrfs_backref_cache *cache) { … } void btrfs_backref_free_edge(struct btrfs_backref_cache *cache, struct btrfs_backref_edge *edge) { … } void btrfs_backref_unlock_node_buffer(struct btrfs_backref_node *node) { … } void btrfs_backref_drop_node_buffer(struct btrfs_backref_node *node) { … } /* * Drop the backref node from cache without cleaning up its children * edges. * * This can only be called on node without parent edges. * The children edges are still kept as is. */ void btrfs_backref_drop_node(struct btrfs_backref_cache *tree, struct btrfs_backref_node *node) { … } /* * Drop the backref node from cache, also cleaning up all its * upper edges and any uncached nodes in the path. * * This cleanup happens bottom up, thus the node should either * be the lowest node in the cache or a detached node. */ void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache, struct btrfs_backref_node *node) { … } /* * Release all nodes/edges from current cache */ void btrfs_backref_release_cache(struct btrfs_backref_cache *cache) { … } void btrfs_backref_link_edge(struct btrfs_backref_edge *edge, struct btrfs_backref_node *lower, struct btrfs_backref_node *upper, int link_which) { … } /* * Handle direct tree backref * * Direct tree backref means, the backref item shows its parent bytenr * directly. This is for SHARED_BLOCK_REF backref (keyed or inlined). * * @ref_key: The converted backref key. * For keyed backref, it's the item key. * For inlined backref, objectid is the bytenr, * type is btrfs_inline_ref_type, offset is * btrfs_inline_ref_offset. */ static int handle_direct_tree_backref(struct btrfs_backref_cache *cache, struct btrfs_key *ref_key, struct btrfs_backref_node *cur) { … } /* * Handle indirect tree backref * * Indirect tree backref means, we only know which tree the node belongs to. * We still need to do a tree search to find out the parents. This is for * TREE_BLOCK_REF backref (keyed or inlined). * * @trans: Transaction handle. * @ref_key: The same as @ref_key in handle_direct_tree_backref() * @tree_key: The first key of this tree block. * @path: A clean (released) path, to avoid allocating path every time * the function get called. */ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans, struct btrfs_backref_cache *cache, struct btrfs_path *path, struct btrfs_key *ref_key, struct btrfs_key *tree_key, struct btrfs_backref_node *cur) { … } /* * Add backref node @cur into @cache. * * NOTE: Even if the function returned 0, @cur is not yet cached as its upper * links aren't yet bi-directional. Needs to finish such links. * Use btrfs_backref_finish_upper_links() to finish such linkage. * * @trans: Transaction handle. * @path: Released path for indirect tree backref lookup * @iter: Released backref iter for extent tree search * @node_key: The first key of the tree block */ int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans, struct btrfs_backref_cache *cache, struct btrfs_path *path, struct btrfs_backref_iter *iter, struct btrfs_key *node_key, struct btrfs_backref_node *cur) { … } /* * Finish the upwards linkage created by btrfs_backref_add_tree_node() */ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache, struct btrfs_backref_node *start) { … } void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache, struct btrfs_backref_node *node) { … }