// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. */ #define pr_fmt(fmt) … #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/gfs2_ondisk.h> #include <linux/prefetch.h> #include <linux/blkdev.h> #include <linux/rbtree.h> #include <linux/random.h> #include "gfs2.h" #include "incore.h" #include "glock.h" #include "glops.h" #include "lops.h" #include "meta_io.h" #include "quota.h" #include "rgrp.h" #include "super.h" #include "trans.h" #include "util.h" #include "log.h" #include "inode.h" #include "trace_gfs2.h" #include "dir.h" #define BFITNOENT … #define NO_BLOCK … struct gfs2_rbm { … }; static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm) { … } static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) { … } /* * These routines are used by the resource group routines (rgrp.c) * to keep track of block allocation. Each block is represented by two * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks. * * 0 = Free * 1 = Used (not metadata) * 2 = Unlinked (still in use) inode * 3 = Used (metadata) */ struct gfs2_extent { … }; static const char valid_change[16] = …; static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, struct gfs2_blkreserv *rs, bool nowrap); /** * gfs2_setbit - Set a bit in the bitmaps * @rbm: The position of the bit to set * @do_clone: Also set the clone bitmap, if it exists * @new_state: the new state of the block * */ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { … } /** * gfs2_testbit - test a bit in the bitmaps * @rbm: The bit to test * @use_clone: If true, test the clone bitmap, not the official bitmap. * * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps, * not the "real" bitmaps, to avoid allocating recently freed blocks. * * Returns: The two bit block state of the requested bit */ static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone) { … } /** * gfs2_bit_search - search bitmap for a state * @ptr: Pointer to bitmap data * @mask: Mask to use (normally 0x55555.... but adjusted for search start) * @state: The state we are searching for * * We xor the bitmap data with a pattern which is the bitwise opposite * of what we are looking for. This gives rise to a pattern of ones * wherever there is a match. Since we have two bits per entry, we * take this pattern, shift it down by one place and then and it with * the original. All the even bit positions (0,2,4, etc) then represent * successful matches, so we mask with 0x55555..... to remove the unwanted * odd bit positions. * * This allows searching of a whole u64 at once (32 blocks) with a * single test (on 64 bit arches). */ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) { … } /** * rs_cmp - multi-block reservation range compare * @start: start of the new reservation * @len: number of blocks in the new reservation * @rs: existing reservation to compare against * * returns: 1 if the block range is beyond the reach of the reservation * -1 if the block range is before the start of the reservation * 0 if the block range overlaps with the reservation */ static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs) { … } /** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. * @buf: the buffer that holds the bitmaps * @len: the length (in bytes) of the buffer * @goal: start search at this block's bit-pair (within @buffer) * @state: GFS2_BLKST_XXX the state of the block we're looking for. * * Scope of @goal and returned block number is only within this bitmap buffer, * not entire rgrp or filesystem. @buffer will be offset from the actual * beginning of a bitmap block buffer, skipping any header structures, but * headers are always a multiple of 64 bits long so that the buffer is * always aligned to a 64 bit boundary. * * The size of the buffer is in bytes, but is it assumed that it is * always ok to read a complete multiple of 64 bits at the end * of the block in case the end is no aligned to a natural boundary. * * Return: the block number (bitmap buffer scope) that was found */ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, u32 goal, u8 state) { … } /** * gfs2_rbm_from_block - Set the rbm based upon rgd and block number * @rbm: The rbm with rgd already set correctly * @block: The block number (filesystem relative) * * This sets the bi and offset members of an rbm based on a * resource group and a filesystem relative block number. The * resource group must be set in the rbm on entry, the bi and * offset members will be set by this function. * * Returns: 0 on success, or an error code */ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) { … } /** * gfs2_rbm_add - add a number of blocks to an rbm * @rbm: The rbm with rgd already set correctly * @blocks: The number of blocks to add to rpm * * This function takes an existing rbm structure and adds a number of blocks to * it. * * Returns: True if the new rbm would point past the end of the rgrp. */ static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks) { … } /** * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned * @rbm: Position to search (value/result) * @n_unaligned: Number of unaligned blocks to check * @len: Decremented for each block found (terminate on zero) * * Returns: true if a non-free block is encountered or the end of the resource * group is reached. */ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) { … } /** * gfs2_free_extlen - Return extent length of free blocks * @rrbm: Starting position * @len: Max length to check * * Starting at the block specified by the rbm, see how many free blocks * there are, not reading more than len blocks ahead. This can be done * using memchr_inv when the blocks are byte aligned, but has to be done * on a block by block basis in case of unaligned blocks. Also this * function can cope with bitmap boundaries (although it must stop on * a resource group boundary) * * Returns: Number of free blocks in the extent */ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) { … } /** * gfs2_bitcount - count the number of bits in a certain state * @rgd: the resource group descriptor * @buffer: the buffer that holds the bitmaps * @buflen: the length (in bytes) of the buffer * @state: the state of the block we're looking for * * Returns: The number of bits */ static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, unsigned int buflen, u8 state) { … } /** * gfs2_rgrp_verify - Verify that a resource group is consistent * @rgd: the rgrp * */ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) { … } /** * gfs2_blk2rgrpd - Find resource group for a given data/meta block number * @sdp: The GFS2 superblock * @blk: The data block number * @exact: True if this needs to be an exact match * * The @exact argument should be set to true by most callers. The exception * is when we need to match blocks which are not represented by the rgrp * bitmap, but which are part of the rgrp (i.e. padding blocks) which are * there for alignment purposes. Another way of looking at it is that @exact * matches only valid data/metadata blocks, but with @exact false, it will * match any block within the extent of the rgrp. * * Returns: The resource group, or NULL if not found */ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact) { … } /** * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem * @sdp: The GFS2 superblock * * Returns: The first rgrp in the filesystem */ struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) { … } /** * gfs2_rgrpd_get_next - get the next RG * @rgd: the resource group descriptor * * Returns: The next rgrp */ struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) { … } void check_and_update_goal(struct gfs2_inode *ip) { … } void gfs2_free_clones(struct gfs2_rgrpd *rgd) { … } static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs, const char *fs_id_buf) { … } /** * __rs_deltree - remove a multi-block reservation from the rgd tree * @rs: The reservation to remove * */ static void __rs_deltree(struct gfs2_blkreserv *rs) { … } /** * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree * @rs: The reservation to remove * */ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) { … } /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation * */ void gfs2_rs_delete(struct gfs2_inode *ip) { … } /** * return_all_reservations - return all reserved blocks back to the rgrp. * @rgd: the rgrp that needs its space back * * We previously reserved a bunch of blocks for allocation. Now we need to * give them back. This leave the reservation structures in tact, but removes * all of their corresponding "no-fly zones". */ static void return_all_reservations(struct gfs2_rgrpd *rgd) { … } void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) { … } /** * compute_bitstructs - Compute the bitmap sizes * @rgd: The resource group descriptor * * Calculates bitmap descriptors, one for each block that contains bitmap data * * Returns: errno */ static int compute_bitstructs(struct gfs2_rgrpd *rgd) { … } /** * gfs2_ri_total - Total up the file system space, according to the rindex. * @sdp: the filesystem * */ u64 gfs2_ri_total(struct gfs2_sbd *sdp) { … } static int rgd_insert(struct gfs2_rgrpd *rgd) { … } /** * read_rindex_entry - Pull in a new resource index entry from the disk * @ip: Pointer to the rindex inode * * Returns: 0 on success, > 0 on EOF, error code otherwise */ static int read_rindex_entry(struct gfs2_inode *ip) { … } /** * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use * @sdp: the GFS2 superblock * * The purpose of this function is to select a subset of the resource groups * and mark them as PREFERRED. We do it in such a way that each node prefers * to use a unique set of rgrps to minimize glock contention. */ static void set_rgrp_preferences(struct gfs2_sbd *sdp) { … } /** * gfs2_ri_update - Pull in a new resource index from the disk * @ip: pointer to the rindex inode * * Returns: 0 on successful update, error code otherwise */ static int gfs2_ri_update(struct gfs2_inode *ip) { … } /** * gfs2_rindex_update - Update the rindex if required * @sdp: The GFS2 superblock * * We grab a lock on the rindex inode to make sure that it doesn't * change whilst we are performing an operation. We keep this lock * for quite long periods of time compared to other locks. This * doesn't matter, since it is shared and it is very, very rarely * accessed in the exclusive mode (i.e. only when expanding the filesystem). * * This makes sure that we're using the latest copy of the resource index * special file, which might have been updated if someone expanded the * filesystem (via gfs2_grow utility), which adds new resource groups. * * Returns: 0 on succeess, error code otherwise */ int gfs2_rindex_update(struct gfs2_sbd *sdp) { … } static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) { … } static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) { … } static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) { … } static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) { … } static u32 count_unlinked(struct gfs2_rgrpd *rgd) { … } static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd) { … } /** * gfs2_rgrp_go_instantiate - Read in a RG's header and bitmaps * @gl: the glock representing the rgrpd to read in * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps. * * Returns: errno */ int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl) { … } static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh) { … } /** * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get() * @rgd: The resource group * */ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd) { … } int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, struct buffer_head *bh, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) { … } /** * gfs2_fitrim - Generate discard requests for unused bits of the filesystem * @filp: Any file on the filesystem * @argp: Pointer to the arguments (also used to pass result) * * Returns: 0 on success, otherwise error code */ int gfs2_fitrim(struct file *filp, void __user *argp) { … } /** * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree * @ip: the inode structure * */ static void rs_insert(struct gfs2_inode *ip) { … } /** * rgd_free - return the number of free blocks we can allocate * @rgd: the resource group * @rs: The reservation to free * * This function returns the number of free blocks for an rgrp. * That's the clone-free blocks (blocks that are free, not including those * still being used for unlinked files that haven't been deleted.) * * It also subtracts any blocks reserved by someone else, but does not * include free blocks that are still part of our current reservation, * because obviously we can (and will) allocate them. */ static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs) { … } /** * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor * @ip: pointer to the inode for which we're reserving blocks * @ap: the allocation parameters * */ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) { … } /** * gfs2_next_unreserved_block - Return next block that is not reserved * @rgd: The resource group * @block: The starting block * @length: The required length * @ignore_rs: Reservation to ignore * * If the block does not appear in any reservation, then return the * block number unchanged. If it does appear in the reservation, then * keep looking through the tree of reservations in order to find the * first block number which is not reserved. */ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, u32 length, struct gfs2_blkreserv *ignore_rs) { … } /** * gfs2_reservation_check_and_update - Check for reservations during block alloc * @rbm: The current position in the resource group * @rs: Our own reservation * @minext: The minimum extent length * @maxext: A pointer to the maximum extent structure * * This checks the current position in the rgrp to see whether there is * a reservation covering this block. If not then this function is a * no-op. If there is, then the position is moved to the end of the * contiguous reservation(s) so that we are pointing at the first * non-reserved block. * * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error */ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, struct gfs2_blkreserv *rs, u32 minext, struct gfs2_extent *maxext) { … } /** * gfs2_rbm_find - Look for blocks of a particular state * @rbm: Value/result starting position and final position * @state: The state which we want to find * @minext: Pointer to the requested extent length * This is updated to be the actual reservation size. * @rs: Our own reservation (NULL to skip checking for reservations) * @nowrap: Stop looking at the end of the rgrp, rather than wrapping * around until we've reached the starting point. * * Side effects: * - If looking for free blocks, we set GBF_FULL on each bitmap which * has no free blocks in it. * - If looking for free blocks, we set rd_extfail_pt on each rgrp which * has come up short on a free block search. * * Returns: 0 on success, -ENOSPC if there is no block of the requested state */ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, struct gfs2_blkreserv *rs, bool nowrap) { … } /** * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes * @rgd: The rgrp * @last_unlinked: block address of the last dinode we unlinked * @skip: block address we should explicitly not unlink * * Returns: 0 if no error * The inode, if one has been found, in inode. */ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) { … } /** * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested * @rgd: The rgrp in question * @loops: An indication of how picky we can be (0=very, 1=less so) * * This function uses the recently added glock statistics in order to * figure out whether a parciular resource group is suffering from * contention from multiple nodes. This is done purely on the basis * of timings, since this is the only data we have to work with and * our aim here is to reject a resource group which is highly contended * but (very important) not to do this too often in order to ensure that * we do not land up introducing fragmentation by changing resource * groups when not actually required. * * The calculation is fairly simple, we want to know whether the SRTTB * (i.e. smoothed round trip time for blocking operations) to acquire * the lock for this rgrp's glock is significantly greater than the * time taken for resource groups on average. We introduce a margin in * the form of the variable @var which is computed as the sum of the two * respective variences, and multiplied by a factor depending on @loops * and whether we have a lot of data to base the decision on. This is * then tested against the square difference of the means in order to * decide whether the result is statistically significant or not. * * Returns: A boolean verdict on the congestion status */ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) { … } /** * gfs2_rgrp_used_recently - test if an rgrp has been used recently * @rs: The block reservation with the rgrp to test * @msecs: The time limit in milliseconds * * Returns: True if the rgrp glock has been used within the time limit */ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, u64 msecs) { … } static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) { … } static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) { … } /** * fast_to_acquire - determine if a resource group will be fast to acquire * @rgd: The rgrp * * If this is one of our preferred rgrps, it should be quicker to acquire, * because we tried to set ourselves up as dlm lock master. */ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) { … } /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for * @ap: the allocation parameters * * We try our best to find an rgrp that has at least ap->target blocks * available. After a couple of passes (loops == 2), the prospects of finding * such an rgrp diminish. At this stage, we return the first rgrp that has * at least ap->min_target blocks available. * * Returns: 0 on success, * -ENOMEM if a suitable rgrp can't be found * errno otherwise */ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) { … } /** * gfs2_inplace_release - release an inplace reservation * @ip: the inode the reservation was taken out on * * Release a reservation made by gfs2_inplace_reserve(). */ void gfs2_inplace_release(struct gfs2_inode *ip) { … } /** * gfs2_alloc_extent - allocate an extent from a given bitmap * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode * @n: The extent length (value/result) * * Add the bitmap buffer to the transaction. * Set the found bits to @new_state to change block's allocation state. */ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, unsigned int *n) { … } /** * rgblk_free - Change alloc state of given block(s) * @sdp: the filesystem * @rgd: the resource group the blocks are in * @bstart: the start of a run of blocks to free * @blen: the length of the block run (all must lie within ONE RG!) * @new_state: GFS2_BLKST_XXX the after-allocation block state */ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, unsigned char new_state) { … } /** * gfs2_rgrp_dump - print out an rgrp * @seq: The iterator * @rgd: The rgrp in question * @fs_id_buf: pointer to file system id (if requested) * */ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, const char *fs_id_buf) { … } static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) { … } /** * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation * @ip: The inode we have just allocated blocks for * @rbm: The start of the allocated blocks * @len: The extent length * * Adjusts a reservation after an allocation has taken place. If the * reservation does not match the allocation, or if it is now empty * then it is removed. */ static void gfs2_adjust_reservation(struct gfs2_inode *ip, const struct gfs2_rbm *rbm, unsigned len) { … } /** * gfs2_set_alloc_start - Set starting point for block allocation * @rbm: The rbm which will be set to the required location * @ip: The gfs2 inode * @dinode: Flag to say if allocation includes a new inode * * This sets the starting point from the reservation if one is active * otherwise it falls back to guessing a start point based on the * inode's goal block or the last allocation point in the rgrp. */ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, const struct gfs2_inode *ip, bool dinode) { … } /** * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * @ip: the inode to allocate the block for * @bn: Used to return the starting block number * @nblocks: requested number of blocks/extent length (value/result) * @dinode: 1 if we're allocating a dinode block, else 0 * * Returns: 0 or error */ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, bool dinode) { … } /** * __gfs2_free_blocks - free a contiguous run of block(s) * @ip: the inode these blocks are being freed from * @rgd: the resource group the blocks are in * @bstart: first block of a run of contiguous blocks * @blen: the length of the block run * @meta: 1 if the blocks represent metadata * */ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta) { … } /** * gfs2_free_meta - free a contiguous run of data block(s) * @ip: the inode these blocks are being freed from * @rgd: the resource group the blocks are in * @bstart: first block of a run of contiguous blocks * @blen: the length of the block run * */ void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen) { … } void gfs2_unlink_di(struct inode *inode) { … } void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { … } /** * gfs2_check_blk_type - Check the type of a block * @sdp: The superblock * @no_addr: The block number to check * @type: The block type we are looking for * * The inode glock of @no_addr must be held. The @type to check for is either * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE * or GFS2_BLKST_USED would make no sense. * * Returns: 0 if the block type matches the expected type * -ESTALE if it doesn't match * or -ve errno if something went wrong while checking */ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) { … } /** * gfs2_rlist_add - add a RG to a list of RGs * @ip: the inode * @rlist: the list of resource groups * @block: the block * * Figure out what RG a block belongs to and add that RG to the list * * FIXME: Don't use NOFAIL * */ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, u64 block) { … } /** * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate * and initialize an array of glock holders for them * @rlist: the list of resource groups * @state: the state we're requesting * @flags: the modifier flags * * FIXME: Don't use NOFAIL * */ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, u16 flags) { … } /** * gfs2_rlist_free - free a resource group list * @rlist: the list of resource groups * */ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) { … } void rgrp_lock_local(struct gfs2_rgrpd *rgd) { … } void rgrp_unlock_local(struct gfs2_rgrpd *rgd) { … }