// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <linux/sched.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "locking.h" #include "accessors.h" #include "messages.h" #include "delalloc-space.h" #include "subpage.h" #include "defrag.h" #include "file-item.h" #include "super.h" static struct kmem_cache *btrfs_inode_defrag_cachep; /* * When auto defrag is enabled we queue up these defrag structs to remember * which inodes need defragging passes. */ struct inode_defrag { … }; static int __compare_inode_defrag(struct inode_defrag *defrag1, struct inode_defrag *defrag2) { … } /* * Pop a record for an inode into the defrag tree. The lock must be held * already. * * If you're inserting a record for an older transid than an existing record, * the transid already in the tree is lowered. * * If an existing record is found the defrag item you pass in is freed. */ static int __btrfs_add_inode_defrag(struct btrfs_inode *inode, struct inode_defrag *defrag) { … } static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info) { … } /* * Insert a defrag record for this inode if auto defrag is enabled. */ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u32 extent_thresh) { … } /* * Pick the defragable inode that we want, if it doesn't exist, we will get the * next one. */ static struct inode_defrag *btrfs_pick_defrag_inode( struct btrfs_fs_info *fs_info, u64 root, u64 ino) { … } void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) { … } #define BTRFS_DEFRAG_BATCH … static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, struct inode_defrag *defrag) { … } /* * Run through the list of inodes in the FS that need defragging. */ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) { … } /* * Check if two blocks addresses are close, used by defrag. */ static bool close_blocks(u64 blocknr, u64 other, u32 blocksize) { … } /* * Go through all the leaves pointed to by a node and reallocate them so that * disk order is close to key order. */ static int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, u64 *last_ret, struct btrfs_key *progress) { … } /* * Defrag all the leaves in a given btree. * Read all the leaves and try to get key order to * better reflect disk order */ static int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root) { … } /* * Defrag a given btree. Every leaf in the btree is read and defragmented. */ int btrfs_defrag_root(struct btrfs_root *root) { … } /* * Defrag specific helper to get an extent map. * * Differences between this and btrfs_get_extent() are: * * - No extent_map will be added to inode->extent_tree * To reduce memory usage in the long run. * * - Extra optimization to skip file extents older than @newer_than * By using btrfs_search_forward() we can skip entire file ranges that * have extents created in past transactions, because btrfs_search_forward() * will not visit leaves and nodes with a generation smaller than given * minimal generation threshold (@newer_than). * * Return valid em if we find a file extent matching the requirement. * Return NULL if we can not find a file extent matching the requirement. * * Return ERR_PTR() for error. */ static struct extent_map *defrag_get_extent(struct btrfs_inode *inode, u64 start, u64 newer_than) { … } static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start, u64 newer_than, bool locked) { … } static u32 get_extent_max_capacity(const struct btrfs_fs_info *fs_info, const struct extent_map *em) { … } static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, u32 extent_thresh, u64 newer_than, bool locked) { … } /* * Prepare one page to be defragged. * * This will ensure: * * - Returned page is locked and has been set up properly. * - No ordered extent exists in the page. * - The page is uptodate. * * NOTE: Caller should also wait for page writeback after the cluster is * prepared, here we don't do writeback wait for each page. */ static struct folio *defrag_prepare_one_folio(struct btrfs_inode *inode, pgoff_t index) { … } struct defrag_target_range { … }; /* * Collect all valid target extents. * * @start: file offset to lookup * @len: length to lookup * @extent_thresh: file extent size threshold, any extent size >= this value * will be ignored * @newer_than: only defrag extents newer than this value * @do_compress: whether the defrag is doing compression * if true, @extent_thresh will be ignored and all regular * file extents meeting @newer_than will be targets. * @locked: if the range has already held extent lock * @target_list: list of targets file extents */ static int defrag_collect_targets(struct btrfs_inode *inode, u64 start, u64 len, u32 extent_thresh, u64 newer_than, bool do_compress, bool locked, struct list_head *target_list, u64 *last_scanned_ret) { … } #define CLUSTER_SIZE … static_assert(…); /* * Defrag one contiguous target range. * * @inode: target inode * @target: target range to defrag * @pages: locked pages covering the defrag range * @nr_pages: number of locked pages * * Caller should ensure: * * - Pages are prepared * Pages should be locked, no ordered extent in the pages range, * no writeback. * * - Extent bits are locked */ static int defrag_one_locked_target(struct btrfs_inode *inode, struct defrag_target_range *target, struct folio **folios, int nr_pages, struct extent_state **cached_state) { … } static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len, u32 extent_thresh, u64 newer_than, bool do_compress, u64 *last_scanned_ret) { … } static int defrag_one_cluster(struct btrfs_inode *inode, struct file_ra_state *ra, u64 start, u32 len, u32 extent_thresh, u64 newer_than, bool do_compress, unsigned long *sectors_defragged, unsigned long max_sectors, u64 *last_scanned_ret) { … } /* * Entry point to file defragmentation. * * @inode: inode to be defragged * @ra: readahead state (can be NUL) * @range: defrag options including range and flags * @newer_than: minimum transid to defrag * @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode * will be defragged. * * Return <0 for error. * Return >=0 for the number of sectors defragged, and range->start will be updated * to indicate the file offset where next defrag should be started at. * (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without * defragging all the range). */ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, struct btrfs_ioctl_defrag_range_args *range, u64 newer_than, unsigned long max_to_defrag) { … } void __cold btrfs_auto_defrag_exit(void) { … } int __init btrfs_auto_defrag_init(void) { … }