linux/drivers/md/dm-thin-metadata.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2011-2012 Red Hat, Inc.
 *
 * This file is released under the GPL.
 */

#include "dm-thin-metadata.h"
#include "persistent-data/dm-btree.h"
#include "persistent-data/dm-space-map.h"
#include "persistent-data/dm-space-map-disk.h"
#include "persistent-data/dm-transaction-manager.h"

#include <linux/list.h>
#include <linux/device-mapper.h>
#include <linux/workqueue.h>

/*
 *--------------------------------------------------------------------------
 * As far as the metadata goes, there is:
 *
 * - A superblock in block zero, taking up fewer than 512 bytes for
 *   atomic writes.
 *
 * - A space map managing the metadata blocks.
 *
 * - A space map managing the data blocks.
 *
 * - A btree mapping our internal thin dev ids onto struct disk_device_details.
 *
 * - A hierarchical btree, with 2 levels which effectively maps (thin
 *   dev id, virtual block) -> block_time.  Block time is a 64-bit
 *   field holding the time in the low 24 bits, and block in the top 40
 *   bits.
 *
 * BTrees consist solely of btree_nodes, that fill a block.  Some are
 * internal nodes, as such their values are a __le64 pointing to other
 * nodes.  Leaf nodes can store data of any reasonable size (ie. much
 * smaller than the block size).  The nodes consist of the header,
 * followed by an array of keys, followed by an array of values.  We have
 * to binary search on the keys so they're all held together to help the
 * cpu cache.
 *
 * Space maps have 2 btrees:
 *
 * - One maps a uint64_t onto a struct index_entry.  Which points to a
 *   bitmap block, and has some details about how many free entries there
 *   are etc.
 *
 * - The bitmap blocks have a header (for the checksum).  Then the rest
 *   of the block is pairs of bits.  With the meaning being:
 *
 *   0 - ref count is 0
 *   1 - ref count is 1
 *   2 - ref count is 2
 *   3 - ref count is higher than 2
 *
 * - If the count is higher than 2 then the ref count is entered in a
 *   second btree that directly maps the block_address to a uint32_t ref
 *   count.
 *
 * The space map metadata variant doesn't have a bitmaps btree.  Instead
 * it has one single blocks worth of index_entries.  This avoids
 * recursive issues with the bitmap btree needing to allocate space in
 * order to insert.  With a small data block size such as 64k the
 * metadata support data devices that are hundreds of terrabytes.
 *
 * The space maps allocate space linearly from front to back.  Space that
 * is freed in a transaction is never recycled within that transaction.
 * To try and avoid fragmenting _free_ space the allocator always goes
 * back and fills in gaps.
 *
 * All metadata io is in THIN_METADATA_BLOCK_SIZE sized/aligned chunks
 * from the block manager.
 *--------------------------------------------------------------------------
 */

#define DM_MSG_PREFIX

#define THIN_SUPERBLOCK_MAGIC
#define THIN_SUPERBLOCK_LOCATION
#define THIN_VERSION
#define SECTOR_TO_BLOCK_SHIFT

/*
 * For btree insert:
 *  3 for btree insert +
 *  2 for btree lookup used within space map
 * For btree remove:
 *  2 for shadow spine +
 *  4 for rebalance 3 child node
 */
#define THIN_MAX_CONCURRENT_LOCKS

/* This should be plenty */
#define SPACE_MAP_ROOT_SIZE

/*
 * Little endian on-disk superblock and device details.
 */
struct thin_disk_superblock {} __packed;

struct disk_device_details {} __packed;

struct dm_pool_metadata {};

struct dm_thin_device {};

/*
 *--------------------------------------------------------------
 * superblock validator
 *--------------------------------------------------------------
 */
#define SUPERBLOCK_CSUM_XOR

static void sb_prepare_for_write(const struct dm_block_validator *v,
				 struct dm_block *b,
				 size_t block_size)
{}

static int sb_check(const struct dm_block_validator *v,
		    struct dm_block *b,
		    size_t block_size)
{}

static const struct dm_block_validator sb_validator =;

/*
 *--------------------------------------------------------------
 * Methods for the btree value types
 *--------------------------------------------------------------
 */
static uint64_t pack_block_time(dm_block_t b, uint32_t t)
{}

static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
{}

/*
 * It's more efficient to call dm_sm_{inc,dec}_blocks as few times as
 * possible.  'with_runs' reads contiguous runs of blocks, and calls the
 * given sm function.
 */
run_fn;

static void with_runs(struct dm_space_map *sm, const __le64 *value_le, unsigned int count, run_fn fn)
{}

static void data_block_inc(void *context, const void *value_le, unsigned int count)
{}

static void data_block_dec(void *context, const void *value_le, unsigned int count)
{}

static int data_block_equal(void *context, const void *value1_le, const void *value2_le)
{}

static void subtree_inc(void *context, const void *value, unsigned int count)
{}

static void subtree_dec(void *context, const void *value, unsigned int count)
{}

static int subtree_equal(void *context, const void *value1_le, const void *value2_le)
{}

/*----------------------------------------------------------------*/

/*
 * Variant that is used for in-core only changes or code that
 * shouldn't put the pool in service on its own (e.g. commit).
 */
static inline void pmd_write_lock_in_core(struct dm_pool_metadata *pmd)
	__acquires(pmd->root_lock)
{}

static inline void pmd_write_lock(struct dm_pool_metadata *pmd)
{}

static inline void pmd_write_unlock(struct dm_pool_metadata *pmd)
	__releases(pmd->root_lock)
{}

/*----------------------------------------------------------------*/

static int superblock_lock_zero(struct dm_pool_metadata *pmd,
				struct dm_block **sblock)
{}

static int superblock_lock(struct dm_pool_metadata *pmd,
			   struct dm_block **sblock)
{}

static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result)
{}

static void __setup_btree_details(struct dm_pool_metadata *pmd)
{}

static int save_sm_roots(struct dm_pool_metadata *pmd)
{}

static void copy_sm_roots(struct dm_pool_metadata *pmd,
			  struct thin_disk_superblock *disk)
{}

static int __write_initial_superblock(struct dm_pool_metadata *pmd)
{}

static int __format_metadata(struct dm_pool_metadata *pmd)
{}

static int __check_incompat_features(struct thin_disk_superblock *disk_super,
				     struct dm_pool_metadata *pmd)
{}

static int __open_metadata(struct dm_pool_metadata *pmd)
{}

static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device)
{}

static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device)
{}

static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd,
					      bool destroy_bm)
{}

static int __begin_transaction(struct dm_pool_metadata *pmd)
{}

static int __write_changed_details(struct dm_pool_metadata *pmd)
{}

static int __commit_transaction(struct dm_pool_metadata *pmd)
{}

static void __set_metadata_reserve(struct dm_pool_metadata *pmd)
{}

struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
					       sector_t data_block_size,
					       bool format_device)
{}

int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
{}

/*
 * __open_device: Returns @td corresponding to device with id @dev,
 * creating it if @create is set and incrementing @td->open_count.
 * On failure, @td is undefined.
 */
static int __open_device(struct dm_pool_metadata *pmd,
			 dm_thin_id dev, int create,
			 struct dm_thin_device **td)
{}

static void __close_device(struct dm_thin_device *td)
{}

static int __create_thin(struct dm_pool_metadata *pmd,
			 dm_thin_id dev)
{}

int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev)
{}

static int __set_snapshot_details(struct dm_pool_metadata *pmd,
				  struct dm_thin_device *snap,
				  dm_thin_id origin, uint32_t time)
{}

static int __create_snap(struct dm_pool_metadata *pmd,
			 dm_thin_id dev, dm_thin_id origin)
{}

int dm_pool_create_snap(struct dm_pool_metadata *pmd,
				 dm_thin_id dev,
				 dm_thin_id origin)
{}

static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev)
{}

int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd,
			       dm_thin_id dev)
{}

int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd,
					uint64_t current_id,
					uint64_t new_id)
{}

int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
					uint64_t *result)
{}

static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
{}

int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
{}

static int __release_metadata_snap(struct dm_pool_metadata *pmd)
{}

int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
{}

static int __get_metadata_snap(struct dm_pool_metadata *pmd,
			       dm_block_t *result)
{}

int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
			      dm_block_t *result)
{}

int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev,
			     struct dm_thin_device **td)
{}

int dm_pool_close_thin_device(struct dm_thin_device *td)
{}

dm_thin_id dm_thin_dev_id(struct dm_thin_device *td)
{}

/*
 * Check whether @time (of block creation) is older than @td's last snapshot.
 * If so then the associated block is shared with the last snapshot device.
 * Any block on a device created *after* the device last got snapshotted is
 * necessarily not shared.
 */
static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time)
{}

static void unpack_lookup_result(struct dm_thin_device *td, __le64 value,
				 struct dm_thin_lookup_result *result)
{}

static int __find_block(struct dm_thin_device *td, dm_block_t block,
			int can_issue_io, struct dm_thin_lookup_result *result)
{}

int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
		       int can_issue_io, struct dm_thin_lookup_result *result)
{}

static int __find_next_mapped_block(struct dm_thin_device *td, dm_block_t block,
					  dm_block_t *vblock,
					  struct dm_thin_lookup_result *result)
{}

static int __find_mapped_range(struct dm_thin_device *td,
			       dm_block_t begin, dm_block_t end,
			       dm_block_t *thin_begin, dm_block_t *thin_end,
			       dm_block_t *pool_begin, bool *maybe_shared)
{}

int dm_thin_find_mapped_range(struct dm_thin_device *td,
			      dm_block_t begin, dm_block_t end,
			      dm_block_t *thin_begin, dm_block_t *thin_end,
			      dm_block_t *pool_begin, bool *maybe_shared)
{}

static int __insert(struct dm_thin_device *td, dm_block_t block,
		    dm_block_t data_block)
{}

int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block,
			 dm_block_t data_block)
{}

static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end)
{}

int dm_thin_remove_range(struct dm_thin_device *td,
			 dm_block_t begin, dm_block_t end)
{}

int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
{}

int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
{}

int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
{}

bool dm_thin_changed_this_transaction(struct dm_thin_device *td)
{}

bool dm_pool_changed_this_transaction(struct dm_pool_metadata *pmd)
{}

bool dm_thin_aborted_changes(struct dm_thin_device *td)
{}

int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result)
{}

int dm_pool_commit_metadata(struct dm_pool_metadata *pmd)
{}

static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
{}

int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
{}

int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result)
{}

int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
					  dm_block_t *result)
{}

int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
				  dm_block_t *result)
{}

int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result)
{}

int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result)
{}

static int __highest_block(struct dm_thin_device *td, dm_block_t *result)
{}

int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
				     dm_block_t *result)
{}

static int __resize_space_map(struct dm_space_map *sm, dm_block_t new_count)
{}

int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
{}

int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
{}

void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
{}

void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd)
{}

int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
					dm_block_t threshold,
					dm_sm_threshold_fn fn,
					void *context)
{}

void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd,
					  dm_pool_pre_commit_fn fn,
					  void *context)
{}

int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd)
{}

bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd)
{}

void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd)
{}