linux/fs/btrfs/disk-io.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/writeback.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/slab.h>
#include <linux/migrate.h>
#include <linux/ratelimit.h>
#include <linux/uuid.h>
#include <linux/semaphore.h>
#include <linux/error-injection.h>
#include <linux/crc32c.h>
#include <linux/sched/mm.h>
#include <asm/unaligned.h>
#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "bio.h"
#include "print-tree.h"
#include "locking.h"
#include "tree-log.h"
#include "free-space-cache.h"
#include "free-space-tree.h"
#include "dev-replace.h"
#include "raid56.h"
#include "sysfs.h"
#include "qgroup.h"
#include "compression.h"
#include "tree-checker.h"
#include "ref-verify.h"
#include "block-group.h"
#include "discard.h"
#include "space-info.h"
#include "zoned.h"
#include "subpage.h"
#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
#include "root-tree.h"
#include "defrag.h"
#include "uuid-tree.h"
#include "relocation.h"
#include "scrub.h"
#include "super.h"

#define BTRFS_SUPER_FLAG_SUPP

static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);

static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
{}

/*
 * Compute the csum of a btree block and store the result to provided buffer.
 */
static void csum_tree_block(struct extent_buffer *buf, u8 *result)
{}

/*
 * we can't consider a given block up to date unless the transid of the
 * block matches the transid in the parent node's pointer.  This is how we
 * detect blocks that either didn't get written at all or got written
 * in the wrong place.
 */
int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic)
{}

static bool btrfs_supported_super_csum(u16 csum_type)
{}

/*
 * Return 0 if the superblock checksum type matches the checksum value of that
 * algorithm. Pass the raw disk superblock data.
 */
int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
			   const struct btrfs_super_block *disk_sb)
{}

static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
				      int mirror_num)
{}

/*
 * helper to read a given tree block, doing retries as required when
 * the checksums don't match and we have alternate mirrors to try.
 *
 * @check:		expected tree parentness check, see the comments of the
 *			structure for details.
 */
int btrfs_read_extent_buffer(struct extent_buffer *eb,
			     const struct btrfs_tree_parent_check *check)
{}

/*
 * Checksum a dirty tree block before IO.
 */
blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
{}

static bool check_tree_block_fsid(struct extent_buffer *eb)
{}

/* Do basic extent buffer checks at read time */
int btrfs_validate_extent_buffer(struct extent_buffer *eb,
				 const struct btrfs_tree_parent_check *check)
{}

#ifdef CONFIG_MIGRATION
static int btree_migrate_folio(struct address_space *mapping,
		struct folio *dst, struct folio *src, enum migrate_mode mode)
{}
#else
#define btree_migrate_folio
#endif

static int btree_writepages(struct address_space *mapping,
			    struct writeback_control *wbc)
{}

static bool btree_release_folio(struct folio *folio, gfp_t gfp_flags)
{}

static void btree_invalidate_folio(struct folio *folio, size_t offset,
				 size_t length)
{}

#ifdef DEBUG
static bool btree_dirty_folio(struct address_space *mapping,
		struct folio *folio)
{
	struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host);
	struct btrfs_subpage_info *spi = fs_info->subpage_info;
	struct btrfs_subpage *subpage;
	struct extent_buffer *eb;
	int cur_bit = 0;
	u64 page_start = folio_pos(folio);

	if (fs_info->sectorsize == PAGE_SIZE) {
		eb = folio_get_private(folio);
		BUG_ON(!eb);
		BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
		BUG_ON(!atomic_read(&eb->refs));
		btrfs_assert_tree_write_locked(eb);
		return filemap_dirty_folio(mapping, folio);
	}

	ASSERT(spi);
	subpage = folio_get_private(folio);

	for (cur_bit = spi->dirty_offset;
	     cur_bit < spi->dirty_offset + spi->bitmap_nr_bits;
	     cur_bit++) {
		unsigned long flags;
		u64 cur;

		spin_lock_irqsave(&subpage->lock, flags);
		if (!test_bit(cur_bit, subpage->bitmaps)) {
			spin_unlock_irqrestore(&subpage->lock, flags);
			continue;
		}
		spin_unlock_irqrestore(&subpage->lock, flags);
		cur = page_start + cur_bit * fs_info->sectorsize;

		eb = find_extent_buffer(fs_info, cur);
		ASSERT(eb);
		ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
		ASSERT(atomic_read(&eb->refs));
		btrfs_assert_tree_write_locked(eb);
		free_extent_buffer(eb);

		cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits) - 1;
	}
	return filemap_dirty_folio(mapping, folio);
}
#else
#define btree_dirty_folio
#endif

static const struct address_space_operations btree_aops =;

struct extent_buffer *btrfs_find_create_tree_block(
						struct btrfs_fs_info *fs_info,
						u64 bytenr, u64 owner_root,
						int level)
{}

/*
 * Read tree block at logical address @bytenr and do variant basic but critical
 * verification.
 *
 * @check:		expected tree parentness check, see comments of the
 *			structure for details.
 */
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
				      struct btrfs_tree_parent_check *check)
{}

static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
			 u64 objectid)
{}

static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
					   u64 objectid, gfp_t flags)
{}

#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/* Should only be used by the testing infrastructure */
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info)
{}
#endif

static int global_root_cmp(struct rb_node *a_node, const struct rb_node *b_node)
{}

static int global_root_key_cmp(const void *k, const struct rb_node *node)
{}

int btrfs_global_root_insert(struct btrfs_root *root)
{}

void btrfs_global_root_delete(struct btrfs_root *root)
{}

struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
				     struct btrfs_key *key)
{}

static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr)
{}

struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr)
{}

struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
{}

struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
				     u64 objectid)
{}

static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
					 struct btrfs_fs_info *fs_info)
{}

int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root)
{}

int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
			     struct btrfs_fs_info *fs_info)
{}

int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root)
{}

static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
					      struct btrfs_path *path,
					      const struct btrfs_key *key)
{}

struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
					const struct btrfs_key *key)
{}

/*
 * Initialize subvolume root in-memory structure
 *
 * @anon_dev:	anonymous device to attach to the root, if zero, allocate new
 */
static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
{}

static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
					       u64 root_id)
{}

static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
						u64 objectid)
{}

int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
			 struct btrfs_root *root)
{}

void btrfs_check_leaked_roots(const struct btrfs_fs_info *fs_info)
{}

static void free_global_roots(struct btrfs_fs_info *fs_info)
{}

void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
{}


/*
 * Get an in-memory reference of a root structure.
 *
 * For essential trees like root/extent tree, we grab it from fs_info directly.
 * For subvolume trees, we check the cached filesystem roots first. If not
 * found, then read it from disk and add it to cached fs roots.
 *
 * Caller should release the root by calling btrfs_put_root() after the usage.
 *
 * NOTE: Reloc and log trees can't be read by this function as they share the
 *	 same root objectid.
 *
 * @objectid:	root id
 * @anon_dev:	preallocated anonymous block device number for new roots,
 *		pass NULL for a new allocation.
 * @check_ref:	whether to check root item references, If true, return -ENOENT
 *		for orphan roots
 */
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
					     u64 objectid, dev_t *anon_dev,
					     bool check_ref)
{}

/*
 * Get in-memory reference of a root structure
 *
 * @objectid:	tree objectid
 * @check_ref:	if set, verify that the tree exists and the item has at least
 *		one reference
 */
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
				     u64 objectid, bool check_ref)
{}

/*
 * Get in-memory reference of a root structure, created as new, optionally pass
 * the anonymous block device id
 *
 * @objectid:	tree objectid
 * @anon_dev:	if NULL, allocate a new anonymous block device or use the
 *		parameter value if not NULL
 */
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
					 u64 objectid, dev_t *anon_dev)
{}

/*
 * Return a root for the given objectid.
 *
 * @fs_info:	the fs_info
 * @objectid:	the objectid we need to lookup
 *
 * This is exclusively used for backref walking, and exists specifically because
 * of how qgroups does lookups.  Qgroups will do a backref lookup at delayed ref
 * creation time, which means we may have to read the tree_root in order to look
 * up a fs root that is not in memory.  If the root is not in memory we will
 * read the tree root commit root and look up the fs root from there.  This is a
 * temporary root, it will not be inserted into the radix tree as it doesn't
 * have the most uptodate information, it'll simply be discarded once the
 * backref code is finished using the root.
 */
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
						 struct btrfs_path *path,
						 u64 objectid)
{}

static int cleaner_kthread(void *arg)
{}

static int transaction_kthread(void *arg)
{}

/*
 * This will find the highest generation in the array of root backups.  The
 * index of the highest array is returned, or -EINVAL if we can't find
 * anything.
 *
 * We check to make sure the array is valid by comparing the
 * generation of the latest  root in the array with the generation
 * in the super block.  If they don't match we pitch it.
 */
static int find_newest_super_backup(struct btrfs_fs_info *info)
{}

/*
 * copy all the root pointers into the super backup array.
 * this will bump the backup pointer by one when it is
 * done
 */
static void backup_super_roots(struct btrfs_fs_info *info)
{}

/*
 * Reads a backup root based on the passed priority. Prio 0 is the newest, prio
 * 1/2/3 are 2nd newest/3rd newest/4th (oldest) backup roots
 *
 * @fs_info:  filesystem whose backup roots need to be read
 * @priority: priority of backup root required
 *
 * Returns backup root index on success and -EINVAL otherwise.
 */
static int read_backup_root(struct btrfs_fs_info *fs_info, u8 priority)
{}

/* helper to cleanup workers */
static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
{}

static void free_root_extent_buffers(struct btrfs_root *root)
{}

static void free_global_root_pointers(struct btrfs_fs_info *fs_info)
{}

/* helper to cleanup tree roots */
static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
{}

void btrfs_put_root(struct btrfs_root *root)
{}

void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
{}

static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
{}

static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
{}

static int btrfs_init_btree_inode(struct super_block *sb)
{}

static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
{}

static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
{}

static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
{}

static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
{}

static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
			    struct btrfs_fs_devices *fs_devices)
{}

static int load_global_roots_objectid(struct btrfs_root *tree_root,
				      struct btrfs_path *path, u64 objectid,
				      const char *name)
{}

static int load_global_roots(struct btrfs_root *tree_root)
{}

static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
{}

/*
 * Real super block validation
 * NOTE: super csum type and incompat features will not be checked here.
 *
 * @sb:		super block to check
 * @mirror_num:	the super block number to check its bytenr:
 * 		0	the primary (1st) sb
 * 		1, 2	2nd and 3rd backup copy
 * 	       -1	skip bytenr check
 */
int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
			 const struct btrfs_super_block *sb, int mirror_num)
{}

/*
 * Validation of super block at mount time.
 * Some checks already done early at mount time, like csum type and incompat
 * flags will be skipped.
 */
static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
{}

/*
 * Validation of super block at write time.
 * Some checks like bytenr check will be skipped as their values will be
 * overwritten soon.
 * Extra checks like csum type and incompat flags will be done here.
 */
static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
				      struct btrfs_super_block *sb)
{}

static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
{}

static int load_important_roots(struct btrfs_fs_info *fs_info)
{}

static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
{}

void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
{}

static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block *sb)
{}

static int btrfs_uuid_rescan_kthread(void *data)
{}

static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
{}

static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{}

/*
 * Mounting logic specific to read-write file systems. Shared by open_ctree
 * and btrfs_remount when remounting from read-only to read-write.
 */
int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
{}

/*
 * Do various sanity and dependency checks of different features.
 *
 * @is_rw_mount:	If the mount is read-write.
 *
 * This is the place for less strict checks (like for subpage or artificial
 * feature dependencies).
 *
 * For strict checks or possible corruption detection, see
 * btrfs_validate_super().
 *
 * This should be called after btrfs_parse_options(), as some mount options
 * (space cache related) can modify on-disk format like free space tree and
 * screw up certain feature dependencies.
 */
int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
{}

int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
		      const char *options)
{}
ALLOW_ERROR_INJECTION();

static void btrfs_end_super_write(struct bio *bio)
{}

struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
						   int copy_num, bool drop_cache)
{}


struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
{}

/*
 * Write superblock @sb to the @device. Do not wait for completion, all the
 * folios we use for writing are locked.
 *
 * Write @max_mirrors copies of the superblock, where 0 means default that fit
 * the expected device size at commit time. Note that max_mirrors must be
 * same for write and wait phases.
 *
 * Return number of errors when folio is not found or submission fails.
 */
static int write_dev_supers(struct btrfs_device *device,
			    struct btrfs_super_block *sb, int max_mirrors)
{}

/*
 * Wait for write completion of superblocks done by write_dev_supers,
 * @max_mirrors same for write and wait phases.
 *
 * Return -1 if primary super block write failed or when there were no super block
 * copies written. Otherwise 0.
 */
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
{}

/*
 * endio for the write_dev_flush, this will wake anyone waiting
 * for the barrier when it is done
 */
static void btrfs_end_empty_barrier(struct bio *bio)
{}

/*
 * Submit a flush request to the device if it supports it. Error handling is
 * done in the waiting counterpart.
 */
static void write_dev_flush(struct btrfs_device *device)
{}

/*
 * If the flush bio has been submitted by write_dev_flush, wait for it.
 * Return true for any error, and false otherwise.
 */
static bool wait_dev_flush(struct btrfs_device *device)
{}

/*
 * send an empty flush down to each device in parallel,
 * then wait for them
 */
static int barrier_all_devices(struct btrfs_fs_info *info)
{}

int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
{}

int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
{}

/* Drop a fs root from the radix tree and free it. */
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
				  struct btrfs_root *root)
{}

int btrfs_commit_super(struct btrfs_fs_info *fs_info)
{}

static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
{}

void __cold close_ctree(struct btrfs_fs_info *fs_info)
{}

void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans,
			     struct extent_buffer *buf)
{}

static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info,
					int flush_delayed)
{}

void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info)
{}

void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
{}

static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
{}

static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
{}

static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
{}

static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
{}

static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
				       struct btrfs_fs_info *fs_info)
{}

static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
{}

static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
{}

static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
					 struct extent_io_tree *dirty_pages,
					 int mark)
{}

static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
					struct extent_io_tree *unpin)
{}

static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
{}

void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
			     struct btrfs_fs_info *fs_info)
{}

static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info)
{}

void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
				   struct btrfs_fs_info *fs_info)
{}

static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
{}

int btrfs_init_root_free_objectid(struct btrfs_root *root)
{}

int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid)
{}