defrag.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

#include <linux/sched.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "locking.h"
#include "accessors.h"
#include "messages.h"
#include "delalloc-space.h"
#include "subpage.h"
#include "defrag.h"
#include "file-item.h"
#include "super.h"

static struct kmem_cache *btrfs_inode_defrag_cachep;

/*
 * When auto defrag is enabled we queue up these defrag structs to remember
 * which inodes need defragging passes.
 */
struct inode_defrag { … };

static int __compare_inode_defrag(struct inode_defrag *defrag1,
				  struct inode_defrag *defrag2)
{ … }

/*
 * Pop a record for an inode into the defrag tree.  The lock must be held
 * already.
 *
 * If you're inserting a record for an older transid than an existing record,
 * the transid already in the tree is lowered.
 *
 * If an existing record is found the defrag item you pass in is freed.
 */
static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
				    struct inode_defrag *defrag)
{ … }

static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
{ … }

/*
 * Insert a defrag record for this inode if auto defrag is enabled.
 */
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
			   struct btrfs_inode *inode, u32 extent_thresh)
{ … }

/*
 * Pick the defragable inode that we want, if it doesn't exist, we will get the
 * next one.
 */
static struct inode_defrag *btrfs_pick_defrag_inode(
			struct btrfs_fs_info *fs_info, u64 root, u64 ino)
{ … }

void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
{ … }

#define BTRFS_DEFRAG_BATCH …

static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
				    struct inode_defrag *defrag)
{ … }

/*
 * Run through the list of inodes in the FS that need defragging.
 */
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
{ … }

/*
 * Check if two blocks addresses are close, used by defrag.
 */
static bool close_blocks(u64 blocknr, u64 other, u32 blocksize)
{ … }

/*
 * Go through all the leaves pointed to by a node and reallocate them so that
 * disk order is close to key order.
 */
static int btrfs_realloc_node(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct extent_buffer *parent,
			      int start_slot, u64 *last_ret,
			      struct btrfs_key *progress)
{ … }

/*
 * Defrag all the leaves in a given btree.
 * Read all the leaves and try to get key order to
 * better reflect disk order
 */

static int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root)
{ … }

/*
 * Defrag a given btree.  Every leaf in the btree is read and defragmented.
 */
int btrfs_defrag_root(struct btrfs_root *root)
{ … }

/*
 * Defrag specific helper to get an extent map.
 *
 * Differences between this and btrfs_get_extent() are:
 *
 * - No extent_map will be added to inode->extent_tree
 *   To reduce memory usage in the long run.
 *
 * - Extra optimization to skip file extents older than @newer_than
 *   By using btrfs_search_forward() we can skip entire file ranges that
 *   have extents created in past transactions, because btrfs_search_forward()
 *   will not visit leaves and nodes with a generation smaller than given
 *   minimal generation threshold (@newer_than).
 *
 * Return valid em if we find a file extent matching the requirement.
 * Return NULL if we can not find a file extent matching the requirement.
 *
 * Return ERR_PTR() for error.
 */
static struct extent_map *defrag_get_extent(struct btrfs_inode *inode,
					    u64 start, u64 newer_than)
{ … }

static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
					       u64 newer_than, bool locked)
{ … }

static u32 get_extent_max_capacity(const struct btrfs_fs_info *fs_info,
				   const struct extent_map *em)
{ … }

static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
				     u32 extent_thresh, u64 newer_than, bool locked)
{ … }

/*
 * Prepare one page to be defragged.
 *
 * This will ensure:
 *
 * - Returned page is locked and has been set up properly.
 * - No ordered extent exists in the page.
 * - The page is uptodate.
 *
 * NOTE: Caller should also wait for page writeback after the cluster is
 * prepared, here we don't do writeback wait for each page.
 */
static struct folio *defrag_prepare_one_folio(struct btrfs_inode *inode, pgoff_t index)
{ … }

struct defrag_target_range { … };

/*
 * Collect all valid target extents.
 *
 * @start:	   file offset to lookup
 * @len:	   length to lookup
 * @extent_thresh: file extent size threshold, any extent size >= this value
 *		   will be ignored
 * @newer_than:    only defrag extents newer than this value
 * @do_compress:   whether the defrag is doing compression
 *		   if true, @extent_thresh will be ignored and all regular
 *		   file extents meeting @newer_than will be targets.
 * @locked:	   if the range has already held extent lock
 * @target_list:   list of targets file extents
 */
static int defrag_collect_targets(struct btrfs_inode *inode,
				  u64 start, u64 len, u32 extent_thresh,
				  u64 newer_than, bool do_compress,
				  bool locked, struct list_head *target_list,
				  u64 *last_scanned_ret)
{ … }

#define CLUSTER_SIZE …
static_assert(…);

/*
 * Defrag one contiguous target range.
 *
 * @inode:	target inode
 * @target:	target range to defrag
 * @pages:	locked pages covering the defrag range
 * @nr_pages:	number of locked pages
 *
 * Caller should ensure:
 *
 * - Pages are prepared
 *   Pages should be locked, no ordered extent in the pages range,
 *   no writeback.
 *
 * - Extent bits are locked
 */
static int defrag_one_locked_target(struct btrfs_inode *inode,
				    struct defrag_target_range *target,
				    struct folio **folios, int nr_pages,
				    struct extent_state **cached_state)
{ … }

static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
			    u32 extent_thresh, u64 newer_than, bool do_compress,
			    u64 *last_scanned_ret)
{ … }

static int defrag_one_cluster(struct btrfs_inode *inode,
			      struct file_ra_state *ra,
			      u64 start, u32 len, u32 extent_thresh,
			      u64 newer_than, bool do_compress,
			      unsigned long *sectors_defragged,
			      unsigned long max_sectors,
			      u64 *last_scanned_ret)
{ … }

/*
 * Entry point to file defragmentation.
 *
 * @inode:	   inode to be defragged
 * @ra:		   readahead state (can be NUL)
 * @range:	   defrag options including range and flags
 * @newer_than:	   minimum transid to defrag
 * @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode
 *		   will be defragged.
 *
 * Return <0 for error.
 * Return >=0 for the number of sectors defragged, and range->start will be updated
 * to indicate the file offset where next defrag should be started at.
 * (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without
 *  defragging all the range).
 */
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
		      struct btrfs_ioctl_defrag_range_args *range,
		      u64 newer_than, unsigned long max_to_defrag)
{ … }

void __cold btrfs_auto_defrag_exit(void)
{ … }

int __init btrfs_auto_defrag_init(void)
{ … }
linux/fs/btrfs/defrag.c