// SPDX-License-Identifier: GPL-2.0 #include "messages.h" #include "ctree.h" #include "delalloc-space.h" #include "block-rsv.h" #include "btrfs_inode.h" #include "space-info.h" #include "qgroup.h" #include "fs.h" /* * HOW DOES THIS WORK * * There are two stages to data reservations, one for data and one for metadata * to handle the new extents and checksums generated by writing data. * * * DATA RESERVATION * The general flow of the data reservation is as follows * * -> Reserve * We call into btrfs_reserve_data_bytes() for the user request bytes that * they wish to write. We make this reservation and add it to * space_info->bytes_may_use. We set EXTENT_DELALLOC on the inode io_tree * for the range and carry on if this is buffered, or follow up trying to * make a real allocation if we are pre-allocating or doing O_DIRECT. * * -> Use * At writepages()/prealloc/O_DIRECT time we will call into * btrfs_reserve_extent() for some part or all of this range of bytes. We * will make the allocation and subtract space_info->bytes_may_use by the * original requested length and increase the space_info->bytes_reserved by * the allocated length. This distinction is important because compression * may allocate a smaller on disk extent than we previously reserved. * * -> Allocation * finish_ordered_io() will insert the new file extent item for this range, * and then add a delayed ref update for the extent tree. Once that delayed * ref is written the extent size is subtracted from * space_info->bytes_reserved and added to space_info->bytes_used. * * Error handling * * -> By the reservation maker * This is the simplest case, we haven't completed our operation and we know * how much we reserved, we can simply call * btrfs_free_reserved_data_space*() and it will be removed from * space_info->bytes_may_use. * * -> After the reservation has been made, but before cow_file_range() * This is specifically for the delalloc case. You must clear * EXTENT_DELALLOC with the EXTENT_CLEAR_DATA_RESV bit, and the range will * be subtracted from space_info->bytes_may_use. * * METADATA RESERVATION * The general metadata reservation lifetimes are discussed elsewhere, this * will just focus on how it is used for delalloc space. * * We keep track of two things on a per inode bases * * ->outstanding_extents * This is the number of file extent items we'll need to handle all of the * outstanding DELALLOC space we have in this inode. We limit the maximum * size of an extent, so a large contiguous dirty area may require more than * one outstanding_extent, which is why count_max_extents() is used to * determine how many outstanding_extents get added. * * ->csum_bytes * This is essentially how many dirty bytes we have for this inode, so we * can calculate the number of checksum items we would have to add in order * to checksum our outstanding data. * * We keep a per-inode block_rsv in order to make it easier to keep track of * our reservation. We use btrfs_calculate_inode_block_rsv_size() to * calculate the current theoretical maximum reservation we would need for the * metadata for this inode. We call this and then adjust our reservation as * necessary, either by attempting to reserve more space, or freeing up excess * space. * * OUTSTANDING_EXTENTS HANDLING * * ->outstanding_extents is used for keeping track of how many extents we will * need to use for this inode, and it will fluctuate depending on where you are * in the life cycle of the dirty data. Consider the following normal case for * a completely clean inode, with a num_bytes < our maximum allowed extent size * * -> reserve * ->outstanding_extents += 1 (current value is 1) * * -> set_delalloc * ->outstanding_extents += 1 (current value is 2) * * -> btrfs_delalloc_release_extents() * ->outstanding_extents -= 1 (current value is 1) * * We must call this once we are done, as we hold our reservation for the * duration of our operation, and then assume set_delalloc will update the * counter appropriately. * * -> add ordered extent * ->outstanding_extents += 1 (current value is 2) * * -> btrfs_clear_delalloc_extent * ->outstanding_extents -= 1 (current value is 1) * * -> finish_ordered_io/btrfs_remove_ordered_extent * ->outstanding_extents -= 1 (current value is 0) * * Each stage is responsible for their own accounting of the extent, thus * making error handling and cleanup easier. */ int btrfs_alloc_data_chunk_ondemand(const struct btrfs_inode *inode, u64 bytes) { … } int btrfs_check_data_free_space(struct btrfs_inode *inode, struct extent_changeset **reserved, u64 start, u64 len, bool noflush) { … } /* * Called if we need to clear a data reservation for this inode * Normally in a error case. * * This one will *NOT* use accurate qgroup reserved space API, just for case * which we can't sleep and is sure it won't affect qgroup reserved space. * Like clear_bit_hook(). */ void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info, u64 len) { … } /* * Called if we need to clear a data reservation for this inode * Normally in a error case. * * This one will handle the per-inode data rsv map for accurate reserved * space framework. */ void btrfs_free_reserved_data_space(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len) { … } /* * Release any excessive reservations for an inode. * * @inode: the inode we need to release from * @qgroup_free: free or convert qgroup meta. Unlike normal operation, qgroup * meta reservation needs to know if we are freeing qgroup * reservation or just converting it into per-trans. Normally * @qgroup_free is true for error handling, and false for normal * release. * * This is the same as btrfs_block_rsv_release, except that it handles the * tracepoint for the reservation. */ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) { … } static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) { … } static void calc_inode_reservations(struct btrfs_inode *inode, u64 num_bytes, u64 disk_num_bytes, u64 *meta_reserve, u64 *qgroup_reserve) { … } int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, u64 disk_num_bytes, bool noflush) { … } /* * Release a metadata reservation for an inode. * * @inode: the inode to release the reservation for. * @num_bytes: the number of bytes we are releasing. * @qgroup_free: free qgroup reservation or convert it to per-trans reservation * * This will release the metadata reservation for an inode. This can be called * once we complete IO for a given set of bytes to release their metadata * reservations, or on error for the same reason. */ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, bool qgroup_free) { … } /* * Release our outstanding_extents for an inode. * * @inode: the inode to balance the reservation for. * @num_bytes: the number of bytes we originally reserved with * * When we reserve space we increase outstanding_extents for the extents we may * add. Once we've set the range as delalloc or created our ordered extents we * have outstanding_extents to track the real usage, so we use this to free our * temporarily tracked outstanding_extents. This _must_ be used in conjunction * with btrfs_delalloc_reserve_metadata. */ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) { … } /* * Reserve data and metadata space for delalloc * * @inode: inode we're writing to * @start: start range we are writing to * @len: how long the range we are writing to * @reserved: mandatory parameter, record actually reserved qgroup ranges of * current reservation. * * This will do the following things * * - reserve space in data space info for num bytes and reserve precious * corresponding qgroup space * (Done in check_data_free_space) * * - reserve space for metadata space, based on the number of outstanding * extents and how much csums will be needed also reserve metadata space in a * per root over-reserve method. * - add to the inodes->delalloc_bytes * - add it to the fs_info's delalloc inodes list. * (Above 3 all done in delalloc_reserve_metadata) * * Return 0 for success * Return <0 for error(-ENOSPC or -EDQUOT) */ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode, struct extent_changeset **reserved, u64 start, u64 len) { … } /* * Release data and metadata space for delalloc * * @inode: inode we're releasing space for * @reserved: list of changed/reserved ranges * @start: start position of the space already reserved * @len: length of the space already reserved * @qgroup_free: should qgroup reserved-space also be freed * * Release the metadata space that was not used and will decrement * ->delalloc_bytes and remove it from the fs_info->delalloc_inodes list if * there are no delalloc bytes left. Also it will handle the qgroup reserved * space. */ void btrfs_delalloc_release_space(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len, bool qgroup_free) { … }