// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * Copyright (C) 2010 Red Hat, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_bmap_btree.h" #include "xfs_quota.h" #include "xfs_trans.h" #include "xfs_qm.h" #include "xfs_trans_space.h" #include "xfs_rtbitmap.h" #include "xfs_attr_item.h" #include "xfs_log.h" #define _ALLOC … #define _FREE … /* * A buffer has a format structure overhead in the log in addition * to the data, so we need to take this into account when reserving * space in a transaction for a buffer. Round the space required up * to a multiple of 128 bytes so that we don't change the historical * reservation that has been used for this overhead. */ STATIC uint xfs_buf_log_overhead(void) { … } /* * Calculate out transaction log reservation per item in bytes. * * The nbufs argument is used to indicate the number of items that * will be changed in a transaction. size is used to tell how many * bytes should be reserved per item. */ STATIC uint xfs_calc_buf_res( uint nbufs, uint size) { … } /* * Per-extent log reservation for the btree changes involved in freeing or * allocating an extent. In classic XFS there were two trees that will be * modified (bnobt + cntbt). With rmap enabled, there are three trees * (rmapbt). The number of blocks reserved is based on the formula: * * num trees * ((2 blocks/level * max depth) - 1) * * Keep in mind that max depth is calculated separately for each type of tree. */ uint xfs_allocfree_block_count( struct xfs_mount *mp, uint num_ops) { … } /* * Per-extent log reservation for refcount btree changes. These are never done * in the same transaction as an allocation or a free, so we compute them * separately. */ static unsigned int xfs_refcountbt_block_count( struct xfs_mount *mp, unsigned int num_ops) { … } /* * Logging inodes is really tricksy. They are logged in memory format, * which means that what we write into the log doesn't directly translate into * the amount of space they use on disk. * * Case in point - btree format forks in memory format use more space than the * on-disk format. In memory, the buffer contains a normal btree block header so * the btree code can treat it as though it is just another generic buffer. * However, when we write it to the inode fork, we don't write all of this * header as it isn't needed. e.g. the root is only ever in the inode, so * there's no need for sibling pointers which would waste 16 bytes of space. * * Hence when we have an inode with a maximally sized btree format fork, then * amount of information we actually log is greater than the size of the inode * on disk. Hence we need an inode reservation function that calculates all this * correctly. So, we log: * * - 4 log op headers for object * - for the ilf, the inode core and 2 forks * - inode log format object * - the inode core * - two inode forks containing bmap btree root blocks. * - the btree data contained by both forks will fit into the inode size, * hence when combined with the inode core above, we have a total of the * actual inode size. * - the BMBT headers need to be accounted separately, as they are * additional to the records and pointers that fit inside the inode * forks. */ STATIC uint xfs_calc_inode_res( struct xfs_mount *mp, uint ninodes) { … } /* * Inode btree record insertion/removal modifies the inode btree and free space * btrees (since the inobt does not use the agfl). This requires the following * reservation: * * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size * * The caller must account for SB and AG header modifications, etc. */ STATIC uint xfs_calc_inobt_res( struct xfs_mount *mp) { … } /* * The free inode btree is a conditional feature. The behavior differs slightly * from that of the traditional inode btree in that the finobt tracks records * for inode chunks with at least one free inode. A record can be removed from * the tree during individual inode allocation. Therefore the finobt * reservation is unconditional for both the inode chunk allocation and * individual inode allocation (modify) cases. * * Behavior aside, the reservation for finobt modification is equivalent to the * traditional inobt: cover a full finobt shape change plus block allocation. */ STATIC uint xfs_calc_finobt_res( struct xfs_mount *mp) { … } /* * Calculate the reservation required to allocate or free an inode chunk. This * includes: * * the allocation btrees: 2 trees * (max depth - 1) * block size * the inode chunk: m_ino_geo.ialloc_blks * N * * The size N of the inode chunk reservation depends on whether it is for * allocation or free and which type of create transaction is in use. An inode * chunk free always invalidates the buffers and only requires reservation for * headers (N == 0). An inode chunk allocation requires a chunk sized * reservation on v4 and older superblocks to initialize the chunk. No chunk * reservation is required for allocation on v5 supers, which use ordered * buffers to initialize. */ STATIC uint xfs_calc_inode_chunk_res( struct xfs_mount *mp, bool alloc) { … } /* * Per-extent log reservation for the btree changes involved in freeing or * allocating a realtime extent. We have to be able to log as many rtbitmap * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime * extents, as well as the realtime summary block. */ static unsigned int xfs_rtalloc_block_count( struct xfs_mount *mp, unsigned int num_ops) { … } /* * Various log reservation values. * * These are based on the size of the file system block because that is what * most transactions manipulate. Each adds in an additional 128 bytes per * item logged to try to account for the overhead of the transaction mechanism. * * Note: Most of the reservations underestimate the number of allocation * groups into which they could free extents in the xfs_defer_finish() call. * This is because the number in the worst case is quite high and quite * unusual. In order to fix this we need to change xfs_defer_finish() to free * extents in only a single AG at a time. This will require changes to the * EFI code as well, however, so that the EFI for the extents not freed is * logged again in each transaction. See SGI PV #261917. * * Reservation functions here avoid a huge stack in xfs_trans_init due to * register overflow from temporaries in the calculations. */ /* * Compute the log reservation required to handle the refcount update * transaction. Refcount updates are always done via deferred log items. * * This is calculated as: * Data device refcount updates (t1): * the agfs of the ags containing the blocks: nr_ops * sector size * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size */ static unsigned int xfs_calc_refcountbt_reservation( struct xfs_mount *mp, unsigned int nr_ops) { … } /* * In a write transaction we can allocate a maximum of 2 * extents. This gives (t1): * the inode getting the new extents: inode size * the inode's bmap btree: max depth * block size * the agfs of the ags from which the extents are allocated: 2 * sector * the superblock free block counter: sector size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size * Or, if we're writing to a realtime file (t2): * the inode getting the new extents: inode size * the inode's bmap btree: max depth * block size * the agfs of the ags from which the extents are allocated: 2 * sector * the superblock free block counter: sector size * the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes * the realtime summary: 1 block * the allocation btrees: 2 trees * (2 * max depth - 1) * block size * And the bmap_finish transaction can free bmap blocks in a join (t3): * the agfs of the ags containing the blocks: 2 * sector size * the agfls of the ags containing the blocks: 2 * sector size * the super block free block counter: sector size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size * And any refcount updates that happen in a separate transaction (t4). */ STATIC uint xfs_calc_write_reservation( struct xfs_mount *mp, bool for_minlogsize) { … } unsigned int xfs_calc_write_reservation_minlogsize( struct xfs_mount *mp) { … } /* * In truncating a file we free up to two extents at once. We can modify (t1): * the inode being truncated: inode size * the inode's bmap btree: (max depth + 1) * block size * And the bmap_finish transaction can free the blocks and bmap blocks (t2): * the agf for each of the ags: 4 * sector size * the agfl for each of the ags: 4 * sector size * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size * Or, if it's a realtime file (t3): * the agf for each of the ags: 2 * sector size * the agfl for each of the ags: 2 * sector size * the super block to reflect the freed blocks: sector size * the realtime bitmap: * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes * the realtime summary: 2 exts * 1 block * worst case split in allocation btrees per extent assuming 2 extents: * 2 exts * 2 trees * (2 * max depth - 1) * block size * And any refcount updates that happen in a separate transaction (t4). */ STATIC uint xfs_calc_itruncate_reservation( struct xfs_mount *mp, bool for_minlogsize) { … } unsigned int xfs_calc_itruncate_reservation_minlogsize( struct xfs_mount *mp) { … } static inline unsigned int xfs_calc_pptr_link_overhead(void) { … } static inline unsigned int xfs_calc_pptr_unlink_overhead(void) { … } static inline unsigned int xfs_calc_pptr_replace_overhead(void) { … } /* * In renaming a files we can modify: * the five inodes involved: 5 * inode size * the two directory btrees: 2 * (max depth + v2) * dir block size * the two directory bmap btrees: 2 * max depth * block size * And the bmap_finish transaction can free dir and bmap blocks (two sets * of bmap blocks) giving (t2): * the agf for the ags in which the blocks live: 3 * sector size * the agfl for the ags in which the blocks live: 3 * sector size * the superblock for the free block count: sector size * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size * If parent pointers are enabled (t3), then each transaction in the chain * must be capable of setting or removing the extended attribute * containing the parent information. It must also be able to handle * the three xattr intent items that track the progress of the parent * pointer update. */ STATIC uint xfs_calc_rename_reservation( struct xfs_mount *mp) { … } static inline unsigned int xfs_rename_log_count( struct xfs_mount *mp, struct xfs_trans_resv *resp) { … } /* * For removing an inode from unlinked list at first, we can modify: * the agi hash list and counters: sector size * the on disk inode before ours in the agi hash list: inode cluster size * the on disk inode in the agi hash list: inode cluster size */ STATIC uint xfs_calc_iunlink_remove_reservation( struct xfs_mount *mp) { … } static inline unsigned int xfs_link_log_count( struct xfs_mount *mp, struct xfs_trans_resv *resp) { … } /* * For creating a link to an inode: * the parent directory inode: inode size * the linked inode: inode size * the directory btree could split: (max depth + v2) * dir block size * the directory bmap btree could join or split: (max depth + v2) * blocksize * And the bmap_finish transaction can free some bmap blocks giving: * the agf for the ag in which the blocks live: sector size * the agfl for the ag in which the blocks live: sector size * the superblock for the free block count: sector size * the allocation btrees: 2 trees * (2 * max depth - 1) * block size */ STATIC uint xfs_calc_link_reservation( struct xfs_mount *mp) { … } /* * For adding an inode to unlinked list we can modify: * the agi hash list: sector size * the on disk inode: inode cluster size */ STATIC uint xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) { … } static inline unsigned int xfs_remove_log_count( struct xfs_mount *mp, struct xfs_trans_resv *resp) { … } /* * For removing a directory entry we can modify: * the parent directory inode: inode size * the removed inode: inode size * the directory btree could join: (max depth + v2) * dir block size * the directory bmap btree could join or split: (max depth + v2) * blocksize * And the bmap_finish transaction can free the dir and bmap blocks giving: * the agf for the ag in which the blocks live: 2 * sector size * the agfl for the ag in which the blocks live: 2 * sector size * the superblock for the free block count: sector size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size */ STATIC uint xfs_calc_remove_reservation( struct xfs_mount *mp) { … } /* * For create, break it in to the two cases that the transaction * covers. We start with the modify case - allocation done by modification * of the state of existing inodes - and the allocation case. */ /* * For create we can modify: * the parent directory inode: inode size * the new inode: inode size * the inode btree entry: block size * the superblock for the nlink flag: sector size * the directory btree: (max depth + v2) * dir block size * the directory inode's bmap btree: (max depth + v2) * block size * the finobt (record modification and allocation btrees) */ STATIC uint xfs_calc_create_resv_modify( struct xfs_mount *mp) { … } /* * For icreate we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size * the inode chunk (allocation, optional init) * the inobt (record insertion) * the finobt (optional, record insertion) */ STATIC uint xfs_calc_icreate_resv_alloc( struct xfs_mount *mp) { … } static inline unsigned int xfs_icreate_log_count( struct xfs_mount *mp, struct xfs_trans_resv *resp) { … } STATIC uint xfs_calc_icreate_reservation( struct xfs_mount *mp) { … } STATIC uint xfs_calc_create_tmpfile_reservation( struct xfs_mount *mp) { … } static inline unsigned int xfs_mkdir_log_count( struct xfs_mount *mp, struct xfs_trans_resv *resp) { … } /* * Making a new directory is the same as creating a new file. */ STATIC uint xfs_calc_mkdir_reservation( struct xfs_mount *mp) { … } static inline unsigned int xfs_symlink_log_count( struct xfs_mount *mp, struct xfs_trans_resv *resp) { … } /* * Making a new symplink is the same as creating a new file, but * with the added blocks for remote symlink data which can be up to 1kB in * length (XFS_SYMLINK_MAXLEN). */ STATIC uint xfs_calc_symlink_reservation( struct xfs_mount *mp) { … } /* * In freeing an inode we can modify: * the inode being freed: inode size * the super block free inode counter, AGF and AGFL: sector size * the on disk inode (agi unlinked list removal) * the inode chunk (invalidated, headers only) * the inode btree * the finobt (record insertion, removal or modification) * * Note that the inode chunk res. includes an allocfree res. for freeing of the * inode chunk. This is technically extraneous because the inode chunk free is * deferred (it occurs after a transaction roll). Include the extra reservation * anyways since we've had reports of ifree transaction overruns due to too many * agfl fixups during inode chunk frees. */ STATIC uint xfs_calc_ifree_reservation( struct xfs_mount *mp) { … } /* * When only changing the inode we log the inode and possibly the superblock * We also add a bit of slop for the transaction stuff. */ STATIC uint xfs_calc_ichange_reservation( struct xfs_mount *mp) { … } /* * Growing the data section of the filesystem. * superblock * agi and agf * allocation btrees */ STATIC uint xfs_calc_growdata_reservation( struct xfs_mount *mp) { … } /* * Growing the rt section of the filesystem. * In the first set of transactions (ALLOC) we allocate space to the * bitmap or summary files. * superblock: sector size * agf of the ag from which the extent is allocated: sector size * bmap btree for bitmap/summary inode: max depth * blocksize * bitmap/summary inode: inode size * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize */ STATIC uint xfs_calc_growrtalloc_reservation( struct xfs_mount *mp) { … } /* * Growing the rt section of the filesystem. * In the second set of transactions (ZERO) we zero the new metadata blocks. * one bitmap/summary block: blocksize */ STATIC uint xfs_calc_growrtzero_reservation( struct xfs_mount *mp) { … } /* * Growing the rt section of the filesystem. * In the third set of transactions (FREE) we update metadata without * allocating any new blocks. * superblock: sector size * bitmap inode: inode size * summary inode: inode size * one bitmap block: blocksize * summary blocks: new summary size */ STATIC uint xfs_calc_growrtfree_reservation( struct xfs_mount *mp) { … } /* * Logging the inode modification timestamp on a synchronous write. * inode */ STATIC uint xfs_calc_swrite_reservation( struct xfs_mount *mp) { … } /* * Logging the inode mode bits when writing a setuid/setgid file * inode */ STATIC uint xfs_calc_writeid_reservation( struct xfs_mount *mp) { … } /* * Converting the inode from non-attributed to attributed. * the inode being converted: inode size * agf block and superblock (for block allocation) * the new block (directory sized) * bmap blocks for the new directory block * allocation btrees */ STATIC uint xfs_calc_addafork_reservation( struct xfs_mount *mp) { … } /* * Removing the attribute fork of a file * the inode being truncated: inode size * the inode's bmap btree: max depth * block size * And the bmap_finish transaction can free the blocks and bmap blocks: * the agf for each of the ags: 4 * sector size * the agfl for each of the ags: 4 * sector size * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size */ STATIC uint xfs_calc_attrinval_reservation( struct xfs_mount *mp) { … } /* * Setting an attribute at mount time. * the inode getting the attribute * the superblock for allocations * the agfs extents are allocated from * the attribute btree * max depth * the inode allocation btree * Since attribute transaction space is dependent on the size of the attribute, * the calculation is done partially at mount time and partially at runtime(see * below). */ STATIC uint xfs_calc_attrsetm_reservation( struct xfs_mount *mp) { … } /* * Setting an attribute at runtime, transaction space unit per block. * the superblock for allocations: sector size * the inode bmap btree could join or split: max depth * block size * Since the runtime attribute transaction space is dependent on the total * blocks needed for the 1st bmap, here we calculate out the space unit for * one block so that the caller could figure out the total space according * to the attibute extent length in blocks by: * ext * M_RES(mp)->tr_attrsetrt.tr_logres */ STATIC uint xfs_calc_attrsetrt_reservation( struct xfs_mount *mp) { … } /* * Removing an attribute. * the inode: inode size * the attribute btree could join: max depth * block size * the inode bmap btree could join or split: max depth * block size * And the bmap_finish transaction can free the attr blocks freed giving: * the agf for the ag in which the blocks live: 2 * sector size * the agfl for the ag in which the blocks live: 2 * sector size * the superblock for the free block count: sector size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size */ STATIC uint xfs_calc_attrrm_reservation( struct xfs_mount *mp) { … } /* * Clearing a bad agino number in an agi hash bucket. */ STATIC uint xfs_calc_clear_agi_bucket_reservation( struct xfs_mount *mp) { … } /* * Adjusting quota limits. * the disk quota buffer: sizeof(struct xfs_disk_dquot) */ STATIC uint xfs_calc_qm_setqlim_reservation(void) { … } /* * Allocating quota on disk if needed. * the write transaction log space for quota file extent allocation * the unit of quota allocation: one system block size */ STATIC uint xfs_calc_qm_dqalloc_reservation( struct xfs_mount *mp, bool for_minlogsize) { … } unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize( struct xfs_mount *mp) { … } /* * Syncing the incore super block changes to disk. * the super block to reflect the changes: sector size */ STATIC uint xfs_calc_sb_reservation( struct xfs_mount *mp) { … } /* * Namespace reservations. * * These get tricky when parent pointers are enabled as we have attribute * modifications occurring from within these transactions. Rather than confuse * each of these reservation calculations with the conditional attribute * reservations, add them here in a clear and concise manner. This requires that * the attribute reservations have already been calculated. * * Note that we only include the static attribute reservation here; the runtime * reservation will have to be modified by the size of the attributes being * added/removed/modified. See the comments on the attribute reservation * calculations for more details. */ STATIC void xfs_calc_namespace_reservations( struct xfs_mount *mp, struct xfs_trans_resv *resp) { … } void xfs_trans_resv_calc( struct xfs_mount *mp, struct xfs_trans_resv *resp) { … }