// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2020 Oracle. All Rights Reserved. * Author: Darrick J. Wong <[email protected]> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_btree.h" #include "xfs_trace.h" #include "xfs_btree_staging.h" /* * Staging Cursors and Fake Roots for Btrees * ========================================= * * A staging btree cursor is a special type of btree cursor that callers must * use to construct a new btree index using the btree bulk loader code. The * bulk loading code uses the staging btree cursor to abstract the details of * initializing new btree blocks and filling them with records or key/ptr * pairs. Regular btree operations (e.g. queries and modifications) are not * supported with staging cursors, and callers must not invoke them. * * Fake root structures contain all the information about a btree that is under * construction by the bulk loading code. Staging btree cursors point to fake * root structures instead of the usual AG header or inode structure. * * Callers are expected to initialize a fake root structure and pass it into * the _stage_cursor function for a specific btree type. When bulk loading is * complete, callers should call the _commit_staged_btree function for that * specific btree type to commit the new btree into the filesystem. */ /* * Bulk Loading for AG Btrees * ========================== * * For a btree rooted in an AG header, pass a xbtree_afakeroot structure to the * staging cursor. Callers should initialize this to zero. * * The _stage_cursor() function for a specific btree type should call * xfs_btree_stage_afakeroot to set up the in-memory cursor as a staging * cursor. The corresponding _commit_staged_btree() function should log the * new root and call xfs_btree_commit_afakeroot() to transform the staging * cursor into a regular btree cursor. */ /* * Initialize a AG-rooted btree cursor with the given AG btree fake root. */ void xfs_btree_stage_afakeroot( struct xfs_btree_cur *cur, struct xbtree_afakeroot *afake) { … } /* * Transform an AG-rooted staging btree cursor back into a regular cursor by * substituting a real btree root for the fake one and restoring normal btree * cursor ops. The caller must log the btree root change prior to calling * this. */ void xfs_btree_commit_afakeroot( struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp) { … } /* * Bulk Loading for Inode-Rooted Btrees * ==================================== * * For a btree rooted in an inode fork, pass a xbtree_ifakeroot structure to * the staging cursor. This structure should be initialized as follows: * * - if_fork_size field should be set to the number of bytes available to the * fork in the inode. * * - if_fork should point to a freshly allocated struct xfs_ifork. * * - if_format should be set to the appropriate fork type (e.g. * XFS_DINODE_FMT_BTREE). * * All other fields must be zero. * * The _stage_cursor() function for a specific btree type should call * xfs_btree_stage_ifakeroot to set up the in-memory cursor as a staging * cursor. The corresponding _commit_staged_btree() function should log the * new root and call xfs_btree_commit_ifakeroot() to transform the staging * cursor into a regular btree cursor. */ /* * Initialize an inode-rooted btree cursor with the given inode btree fake * root. The btree cursor's bc_ops will be overridden as needed to make the * staging functionality work. If new_ops is not NULL, these new ops will be * passed out to the caller for further overriding. */ void xfs_btree_stage_ifakeroot( struct xfs_btree_cur *cur, struct xbtree_ifakeroot *ifake) { … } /* * Transform an inode-rooted staging btree cursor back into a regular cursor by * substituting a real btree root for the fake one and restoring normal btree * cursor ops. The caller must log the btree root change prior to calling * this. */ void xfs_btree_commit_ifakeroot( struct xfs_btree_cur *cur, struct xfs_trans *tp, int whichfork) { … } /* * Bulk Loading of Staged Btrees * ============================= * * This interface is used with a staged btree cursor to create a totally new * btree with a large number of records (i.e. more than what would fit in a * single root block). When the creation is complete, the new root can be * linked atomically into the filesystem by committing the staged cursor. * * Creation of a new btree proceeds roughly as follows: * * The first step is to initialize an appropriate fake btree root structure and * then construct a staged btree cursor. Refer to the block comments about * "Bulk Loading for AG Btrees" and "Bulk Loading for Inode-Rooted Btrees" for * more information about how to do this. * * The second step is to initialize a struct xfs_btree_bload context as * documented in the structure definition. * * The third step is to call xfs_btree_bload_compute_geometry to compute the * height of and the number of blocks needed to construct the btree. See the * section "Computing the Geometry of the New Btree" for details about this * computation. * * In step four, the caller must allocate xfs_btree_bload.nr_blocks blocks and * save them for later use by ->claim_block(). Bulk loading requires all * blocks to be allocated beforehand to avoid ENOSPC failures midway through a * rebuild, and to minimize seek distances of the new btree. * * Step five is to call xfs_btree_bload() to start constructing the btree. * * The final step is to commit the staging btree cursor, which logs the new * btree root and turns the staging cursor into a regular cursor. The caller * is responsible for cleaning up the previous btree blocks, if any. * * Computing the Geometry of the New Btree * ======================================= * * The number of items placed in each btree block is computed via the following * algorithm: For leaf levels, the number of items for the level is nr_records * in the bload structure. For node levels, the number of items for the level * is the number of blocks in the next lower level of the tree. For each * level, the desired number of items per block is defined as: * * desired = max(minrecs, maxrecs - slack factor) * * The number of blocks for the level is defined to be: * * blocks = floor(nr_items / desired) * * Note this is rounded down so that the npb calculation below will never fall * below minrecs. The number of items that will actually be loaded into each * btree block is defined as: * * npb = nr_items / blocks * * Some of the leftmost blocks in the level will contain one extra record as * needed to handle uneven division. If the number of records in any block * would exceed maxrecs for that level, blocks is incremented and npb is * recalculated. * * In other words, we compute the number of blocks needed to satisfy a given * loading level, then spread the items as evenly as possible. * * The height and number of fs blocks required to create the btree are computed * and returned via btree_height and nr_blocks. */ /* * Put a btree block that we're loading onto the ordered list and release it. * The btree blocks will be written to disk when bulk loading is finished. * If we reach the dirty buffer threshold, flush them to disk before * continuing. */ static int xfs_btree_bload_drop_buf( struct xfs_btree_bload *bbl, struct list_head *buffers_list, struct xfs_buf **bpp) { … } /* * Allocate and initialize one btree block for bulk loading. * * The new btree block will have its level and numrecs fields set to the values * of the level and nr_this_block parameters, respectively. * * The caller should ensure that ptrp, bpp, and blockp refer to the left * sibling of the new block, if there is any. On exit, ptrp, bpp, and blockp * will all point to the new block. */ STATIC int xfs_btree_bload_prep_block( struct xfs_btree_cur *cur, struct xfs_btree_bload *bbl, struct list_head *buffers_list, unsigned int level, unsigned int nr_this_block, union xfs_btree_ptr *ptrp, /* in/out */ struct xfs_buf **bpp, /* in/out */ struct xfs_btree_block **blockp, /* in/out */ void *priv) { … } /* Load one leaf block. */ STATIC int xfs_btree_bload_leaf( struct xfs_btree_cur *cur, unsigned int recs_this_block, xfs_btree_bload_get_records_fn get_records, struct xfs_btree_block *block, void *priv) { … } /* * Load one node block with key/ptr pairs. * * child_ptr must point to a block within the next level down in the tree. A * key/ptr entry will be created in the new node block to the block pointed to * by child_ptr. On exit, child_ptr points to the next block on the child * level that needs processing. */ STATIC int xfs_btree_bload_node( struct xfs_btree_cur *cur, unsigned int recs_this_block, union xfs_btree_ptr *child_ptr, struct xfs_btree_block *block) { … } /* * Compute the maximum number of records (or keyptrs) per block that we want to * install at this level in the btree. Caller is responsible for having set * @cur->bc_ino.forksize to the desired fork size, if appropriate. */ STATIC unsigned int xfs_btree_bload_max_npb( struct xfs_btree_cur *cur, struct xfs_btree_bload *bbl, unsigned int level) { … } /* * Compute the desired number of records (or keyptrs) per block that we want to * install at this level in the btree, which must be somewhere between minrecs * and max_npb. The caller is free to install fewer records per block. */ STATIC unsigned int xfs_btree_bload_desired_npb( struct xfs_btree_cur *cur, struct xfs_btree_bload *bbl, unsigned int level) { … } /* * Compute the number of records to be stored in each block at this level and * the number of blocks for this level. For leaf levels, we must populate an * empty root block even if there are no records, so we have to have at least * one block. */ STATIC void xfs_btree_bload_level_geometry( struct xfs_btree_cur *cur, struct xfs_btree_bload *bbl, unsigned int level, uint64_t nr_this_level, unsigned int *avg_per_block, uint64_t *blocks, uint64_t *blocks_with_extra) { … } /* * Ensure a slack value is appropriate for the btree. * * If the slack value is negative, set slack so that we fill the block to * halfway between minrecs and maxrecs. Make sure the slack is never so large * that we can underflow minrecs. */ static void xfs_btree_bload_ensure_slack( struct xfs_btree_cur *cur, int *slack, int level) { … } /* * Prepare a btree cursor for a bulk load operation by computing the geometry * fields in bbl. Caller must ensure that the btree cursor is a staging * cursor. This function can be called multiple times. */ int xfs_btree_bload_compute_geometry( struct xfs_btree_cur *cur, struct xfs_btree_bload *bbl, uint64_t nr_records) { … } /* Bulk load a btree given the parameters and geometry established in bbl. */ int xfs_btree_bload( struct xfs_btree_cur *cur, struct xfs_btree_bload *bbl, void *priv) { … }