linux/drivers/md/dm-raid.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2010-2011 Neil Brown
 * Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved.
 *
 * This file is released under the GPL.
 */

#include <linux/slab.h>
#include <linux/module.h>

#include "md.h"
#include "raid1.h"
#include "raid5.h"
#include "raid10.h"
#include "md-bitmap.h"

#include <linux/device-mapper.h>

#define DM_MSG_PREFIX
#define MAX_RAID_DEVICES

/*
 * Minimum sectors of free reshape space per raid device
 */
#define MIN_FREE_RESHAPE_SPACE

/*
 * Minimum journal space 4 MiB in sectors.
 */
#define MIN_RAID456_JOURNAL_SPACE

static bool devices_handle_discard_safely;

/*
 * The following flags are used by dm-raid to set up the array state.
 * They must be cleared before md_run is called.
 */
#define FirstUse

struct raid_dev {};

/*
 * Bits for establishing rs->ctr_flags
 *
 * 1 = no flag value
 * 2 = flag with value
 */
#define __CTR_FLAG_SYNC
#define __CTR_FLAG_NOSYNC
#define __CTR_FLAG_REBUILD
#define __CTR_FLAG_DAEMON_SLEEP
#define __CTR_FLAG_MIN_RECOVERY_RATE
#define __CTR_FLAG_MAX_RECOVERY_RATE
#define __CTR_FLAG_MAX_WRITE_BEHIND
#define __CTR_FLAG_WRITE_MOSTLY
#define __CTR_FLAG_STRIPE_CACHE
#define __CTR_FLAG_REGION_SIZE
#define __CTR_FLAG_RAID10_COPIES
#define __CTR_FLAG_RAID10_FORMAT
/* New for v1.9.0 */
#define __CTR_FLAG_DELTA_DISKS
#define __CTR_FLAG_DATA_OFFSET
#define __CTR_FLAG_RAID10_USE_NEAR_SETS

/* New for v1.10.0 */
#define __CTR_FLAG_JOURNAL_DEV

/* New for v1.11.1 */
#define __CTR_FLAG_JOURNAL_MODE

/*
 * Flags for rs->ctr_flags field.
 */
#define CTR_FLAG_SYNC
#define CTR_FLAG_NOSYNC
#define CTR_FLAG_REBUILD
#define CTR_FLAG_DAEMON_SLEEP
#define CTR_FLAG_MIN_RECOVERY_RATE
#define CTR_FLAG_MAX_RECOVERY_RATE
#define CTR_FLAG_MAX_WRITE_BEHIND
#define CTR_FLAG_WRITE_MOSTLY
#define CTR_FLAG_STRIPE_CACHE
#define CTR_FLAG_REGION_SIZE
#define CTR_FLAG_RAID10_COPIES
#define CTR_FLAG_RAID10_FORMAT
#define CTR_FLAG_DELTA_DISKS
#define CTR_FLAG_DATA_OFFSET
#define CTR_FLAG_RAID10_USE_NEAR_SETS
#define CTR_FLAG_JOURNAL_DEV
#define CTR_FLAG_JOURNAL_MODE

/*
 * Definitions of various constructor flags to
 * be used in checks of valid / invalid flags
 * per raid level.
 */
/* Define all any sync flags */
#define CTR_FLAGS_ANY_SYNC

/* Define flags for options without argument (e.g. 'nosync') */
#define CTR_FLAG_OPTIONS_NO_ARGS

/* Define flags for options with one argument (e.g. 'delta_disks +2') */
#define CTR_FLAG_OPTIONS_ONE_ARG

/* Valid options definitions per raid level... */

/* "raid0" does only accept data offset */
#define RAID0_VALID_FLAGS

/* "raid1" does not accept stripe cache, data offset, delta_disks or any raid10 options */
#define RAID1_VALID_FLAGS

/* "raid10" does not accept any raid1 or stripe cache options */
#define RAID10_VALID_FLAGS

/*
 * "raid4/5/6" do not accept any raid1 or raid10 specific options
 *
 * "raid6" does not accept "nosync", because it is not guaranteed
 * that both parity and q-syndrome are being written properly with
 * any writes
 */
#define RAID45_VALID_FLAGS

#define RAID6_VALID_FLAGS
/* ...valid options definitions per raid level */

/*
 * Flags for rs->runtime_flags field
 * (RT_FLAG prefix meaning "runtime flag")
 *
 * These are all internal and used to define runtime state,
 * e.g. to prevent another resume from preresume processing
 * the raid set all over again.
 */
#define RT_FLAG_RS_PRERESUMED
#define RT_FLAG_RS_RESUMED
#define RT_FLAG_RS_BITMAP_LOADED
#define RT_FLAG_UPDATE_SBS
#define RT_FLAG_RESHAPE_RS
#define RT_FLAG_RS_SUSPENDED
#define RT_FLAG_RS_IN_SYNC
#define RT_FLAG_RS_RESYNCING
#define RT_FLAG_RS_GROW
#define RT_FLAG_RS_FROZEN

/* Array elements of 64 bit needed for rebuild/failed disk bits */
#define DISKS_ARRAY_ELEMS

/*
 * raid set level, layout and chunk sectors backup/restore
 */
struct rs_layout {};

struct raid_set {};

static void rs_config_backup(struct raid_set *rs, struct rs_layout *l)
{}

static void rs_config_restore(struct raid_set *rs, struct rs_layout *l)
{}

/* raid10 algorithms (i.e. formats) */
#define ALGORITHM_RAID10_DEFAULT
#define ALGORITHM_RAID10_NEAR
#define ALGORITHM_RAID10_OFFSET
#define ALGORITHM_RAID10_FAR

/* Supported raid types and properties. */
static struct raid_type {} raid_types[] =;

/* True, if @v is in inclusive range [@min, @max] */
static bool __within_range(long v, long min, long max)
{}

/* All table line arguments are defined here */
static struct arg_name_flag {} __arg_name_flags[] =;

/* Return argument name string for given @flag */
static const char *dm_raid_arg_name_by_flag(const uint32_t flag)
{}

/* Define correlation of raid456 journal cache modes and dm-raid target line parameters */
static struct {} _raid456_journal_mode[] =;

/* Return MD raid4/5/6 journal mode for dm @journal_mode one */
static int dm_raid_journal_mode_to_md(const char *mode)
{}

/* Return dm-raid raid4/5/6 journal mode string for @mode */
static const char *md_journal_mode_to_dm_raid(const int mode)
{}

/*
 * Bool helpers to test for various raid levels of a raid set.
 * It's level as reported by the superblock rather than
 * the requested raid_type passed to the constructor.
 */
/* Return true, if raid set in @rs is raid0 */
static bool rs_is_raid0(struct raid_set *rs)
{}

/* Return true, if raid set in @rs is raid1 */
static bool rs_is_raid1(struct raid_set *rs)
{}

/* Return true, if raid set in @rs is raid10 */
static bool rs_is_raid10(struct raid_set *rs)
{}

/* Return true, if raid set in @rs is level 6 */
static bool rs_is_raid6(struct raid_set *rs)
{}

/* Return true, if raid set in @rs is level 4, 5 or 6 */
static bool rs_is_raid456(struct raid_set *rs)
{}

/* Return true, if raid set in @rs is reshapable */
static bool __is_raid10_far(int layout);
static bool rs_is_reshapable(struct raid_set *rs)
{}

/* Return true, if raid set in @rs is recovering */
static bool rs_is_recovering(struct raid_set *rs)
{}

/* Return true, if raid set in @rs is reshaping */
static bool rs_is_reshaping(struct raid_set *rs)
{}

/*
 * bool helpers to test for various raid levels of a raid type @rt
 */

/* Return true, if raid type in @rt is raid0 */
static bool rt_is_raid0(struct raid_type *rt)
{}

/* Return true, if raid type in @rt is raid1 */
static bool rt_is_raid1(struct raid_type *rt)
{}

/* Return true, if raid type in @rt is raid10 */
static bool rt_is_raid10(struct raid_type *rt)
{}

/* Return true, if raid type in @rt is raid4/5 */
static bool rt_is_raid45(struct raid_type *rt)
{}

/* Return true, if raid type in @rt is raid6 */
static bool rt_is_raid6(struct raid_type *rt)
{}

/* Return true, if raid type in @rt is raid4/5/6 */
static bool rt_is_raid456(struct raid_type *rt)
{}
/* END: raid level bools */

/* Return valid ctr flags for the raid level of @rs */
static unsigned long __valid_flags(struct raid_set *rs)
{}

/*
 * Check for valid flags set on @rs
 *
 * Has to be called after parsing of the ctr flags!
 */
static int rs_check_for_valid_flags(struct raid_set *rs)
{}

/* MD raid10 bit definitions and helpers */
#define RAID10_OFFSET
#define RAID10_BROCKEN_USE_FAR_SETS
#define RAID10_USE_FAR_SETS
#define RAID10_FAR_COPIES_SHIFT

/* Return md raid10 near copies for @layout */
static unsigned int __raid10_near_copies(int layout)
{}

/* Return md raid10 far copies for @layout */
static unsigned int __raid10_far_copies(int layout)
{}

/* Return true if md raid10 offset for @layout */
static bool __is_raid10_offset(int layout)
{}

/* Return true if md raid10 near for @layout */
static bool __is_raid10_near(int layout)
{}

/* Return true if md raid10 far for @layout */
static bool __is_raid10_far(int layout)
{}

/* Return md raid10 layout string for @layout */
static const char *raid10_md_layout_to_format(int layout)
{}

/* Return md raid10 algorithm for @name */
static int raid10_name_to_format(const char *name)
{}

/* Return md raid10 copies for @layout */
static unsigned int raid10_md_layout_to_copies(int layout)
{}

/* Return md raid10 format id for @format string */
static int raid10_format_to_md_layout(struct raid_set *rs,
				      unsigned int algorithm,
				      unsigned int copies)
{}
/* END: MD raid10 bit definitions and helpers */

/* Check for any of the raid10 algorithms */
static bool __got_raid10(struct raid_type *rtp, const int layout)
{}

/* Return raid_type for @name */
static struct raid_type *get_raid_type(const char *name)
{}

/* Return raid_type for @name based derived from @level and @layout */
static struct raid_type *get_raid_type_by_ll(const int level, const int layout)
{}

/* Adjust rdev sectors */
static void rs_set_rdev_sectors(struct raid_set *rs)
{}

/*
 * Change bdev capacity of @rs in case of a disk add/remove reshape
 */
static void rs_set_capacity(struct raid_set *rs)
{}

/*
 * Set the mddev properties in @rs to the current
 * ones retrieved from the freshest superblock
 */
static void rs_set_cur(struct raid_set *rs)
{}

/*
 * Set the mddev properties in @rs to the new
 * ones requested by the ctr
 */
static void rs_set_new(struct raid_set *rs)
{}

static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *raid_type,
				       unsigned int raid_devs)
{}

/* Free all @rs allocations */
static void raid_set_free(struct raid_set *rs)
{}

/*
 * For every device we have two words
 *  <meta_dev>: meta device name or '-' if missing
 *  <data_dev>: data device name or '-' if missing
 *
 * The following are permitted:
 *    - -
 *    - <data_dev>
 *    <meta_dev> <data_dev>
 *
 * The following is not allowed:
 *    <meta_dev> -
 *
 * This code parses those words.  If there is a failure,
 * the caller must use raid_set_free() to unwind the operations.
 */
static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as)
{}

/*
 * validate_region_size
 * @rs
 * @region_size:  region size in sectors.  If 0, pick a size (4MiB default).
 *
 * Set rs->md.bitmap_info.chunksize (which really refers to 'region size').
 * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap.
 *
 * Returns: 0 on success, -EINVAL on failure.
 */
static int validate_region_size(struct raid_set *rs, unsigned long region_size)
{}

/*
 * validate_raid_redundancy
 * @rs
 *
 * Determine if there are enough devices in the array that haven't
 * failed (or are being rebuilt) to form a usable array.
 *
 * Returns: 0 on success, -EINVAL on failure.
 */
static int validate_raid_redundancy(struct raid_set *rs)
{}

/*
 * Possible arguments are...
 *	<chunk_size> [optional_args]
 *
 * Argument definitions
 *    <chunk_size>			The number of sectors per disk that
 *					will form the "stripe"
 *    [[no]sync]			Force or prevent recovery of the
 *					entire array
 *    [rebuild <idx>]			Rebuild the drive indicated by the index
 *    [daemon_sleep <ms>]		Time between bitmap daemon work to
 *					clear bits
 *    [min_recovery_rate <kB/sec/disk>]	Throttle RAID initialization
 *    [max_recovery_rate <kB/sec/disk>]	Throttle RAID initialization
 *    [write_mostly <idx>]		Indicate a write mostly drive via index
 *    [max_write_behind <sectors>]	See '-write-behind=' (man mdadm)
 *    [stripe_cache <sectors>]		Stripe cache size for higher RAIDs
 *    [region_size <sectors>]		Defines granularity of bitmap
 *    [journal_dev <dev>]		raid4/5/6 journaling deviice
 *					(i.e. write hole closing log)
 *
 * RAID10-only options:
 *    [raid10_copies <# copies>]	Number of copies.  (Default: 2)
 *    [raid10_format <near|far|offset>] Layout algorithm.  (Default: near)
 */
static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
			     unsigned int num_raid_params)
{}

/* Set raid4/5/6 cache size */
static int rs_set_raid456_stripe_cache(struct raid_set *rs)
{}

/* Return # of data stripes as kept in mddev as of @rs (i.e. as of superblock) */
static unsigned int mddev_data_stripes(struct raid_set *rs)
{}

/* Return # of data stripes of @rs (i.e. as of ctr) */
static unsigned int rs_data_stripes(struct raid_set *rs)
{}

/*
 * Retrieve rdev->sectors from any valid raid device of @rs
 * to allow userpace to pass in arbitray "- -" device tupples.
 */
static sector_t __rdev_sectors(struct raid_set *rs)
{}

/* Check that calculated dev_sectors fits all component devices. */
static int _check_data_dev_sectors(struct raid_set *rs)
{}

/* Get reshape sectors from data_offsets or raid set */
static sector_t _get_reshape_sectors(struct raid_set *rs)
{}

/* Calculate the sectors per device and per array used for @rs */
static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev)
{}

/* Setup recovery on @rs */
static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
{}

static void do_table_event(struct work_struct *ws)
{}

/*
 * Make sure a valid takover (level switch) is being requested on @rs
 *
 * Conversions of raid sets from one MD personality to another
 * have to conform to restrictions which are enforced here.
 */
static int rs_check_takeover(struct raid_set *rs)
{}

/* True if @rs requested to be taken over */
static bool rs_takeover_requested(struct raid_set *rs)
{}

/* True if layout is set to reshape. */
static bool rs_is_layout_change(struct raid_set *rs, bool use_mddev)
{}

/* True if @rs is requested to reshape by ctr */
static bool rs_reshape_requested(struct raid_set *rs)
{}

/*  Features */
#define FEATURE_FLAG_SUPPORTS_V190

/* State flags for sb->flags */
#define SB_FLAG_RESHAPE_ACTIVE
#define SB_FLAG_RESHAPE_BACKWARDS

/*
 * This structure is never routinely used by userspace, unlike md superblocks.
 * Devices with this superblock should only ever be accessed via device-mapper.
 */
#define DM_RAID_MAGIC
struct dm_raid_superblock {} __packed;

/*
 * Check for reshape constraints on raid set @rs:
 *
 * - reshape function non-existent
 * - degraded set
 * - ongoing recovery
 * - ongoing reshape
 *
 * Returns 0 if none or -EPERM if given constraint
 * and error message reference in @errmsg
 */
static int rs_check_reshape(struct raid_set *rs)
{}

static int read_disk_sb(struct md_rdev *rdev, int size, bool force_reload)
{}

static void sb_retrieve_failed_devices(struct dm_raid_superblock *sb, uint64_t *failed_devices)
{}

static void sb_update_failed_devices(struct dm_raid_superblock *sb, uint64_t *failed_devices)
{}

/*
 * Synchronize the superblock members with the raid set properties
 *
 * All superblock data is little endian.
 */
static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
{}

/*
 * super_load
 *
 * This function creates a superblock if one is not found on the device
 * and will decide which superblock to use if there's a choice.
 *
 * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise
 */
static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
{}

static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
{}

static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
{}

/*
 * Analyse superblocks and select the freshest.
 */
static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
{}

/*
 * Adjust data_offset and new_data_offset on all disk members of @rs
 * for out of place reshaping if requested by constructor
 *
 * We need free space at the beginning of each raid disk for forward
 * and at the end for backward reshapes which userspace has to provide
 * via remapping/reordering of space.
 */
static int rs_adjust_data_offsets(struct raid_set *rs)
{}

/* Userpace reordered disks -> adjust raid_disk indexes in @rs */
static void __reorder_raid_disk_indexes(struct raid_set *rs)
{}

/*
 * Setup @rs for takeover by a different raid level
 */
static int rs_setup_takeover(struct raid_set *rs)
{}

/* Prepare @rs for reshape */
static int rs_prepare_reshape(struct raid_set *rs)
{}

/*
 * Reshape:
 * - change raid layout
 * - change chunk size
 * - add disks
 * - remove disks
 */
static int rs_setup_reshape(struct raid_set *rs)
{}

/*
 * If the md resync thread has updated superblock with max reshape position
 * at the end of a reshape but not (yet) reset the layout configuration
 * changes -> reset the latter.
 */
static void rs_reset_inconclusive_reshape(struct raid_set *rs)
{}

/*
 * Enable/disable discard support on RAID set depending on
 * RAID level and discard properties of underlying RAID members.
 */
static void configure_discard_support(struct raid_set *rs)
{}

/*
 * Construct a RAID0/1/10/4/5/6 mapping:
 * Args:
 *	<raid_type> <#raid_params> <raid_params>{0,}	\
 *	<#raid_devs> [<meta_dev1> <dev1>]{1,}
 *
 * <raid_params> varies by <raid_type>.	 See 'parse_raid_params' for
 * details on possible <raid_params>.
 *
 * Userspace is free to initialize the metadata devices, hence the superblocks to
 * enforce recreation based on the passed in table parameters.
 *
 */
static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{}

static void raid_dtr(struct dm_target *ti)
{}

static int raid_map(struct dm_target *ti, struct bio *bio)
{}

/* Return sync state string for @state */
enum sync_state {};
static const char *sync_str(enum sync_state state)
{
	/* Has to be in above sync_state order! */
	static const char *sync_strs[] = {
		"frozen",
		"reshape",
		"resync",
		"check",
		"repair",
		"recover",
		"idle"
	};

	return __within_range(state, 0, ARRAY_SIZE(sync_strs) - 1) ? sync_strs[state] : "undef";
};

/* Return enum sync_state for @mddev derived from @recovery flags */
static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)
{}

/*
 * Return status string for @rdev
 *
 * Status characters:
 *
 *  'D' = Dead/Failed raid set component or raid4/5/6 journal device
 *  'a' = Alive but not in-sync raid set component _or_ alive raid4/5/6 'write_back' journal device
 *  'A' = Alive and in-sync raid set component _or_ alive raid4/5/6 'write_through' journal device
 *  '-' = Non-existing device (i.e. uspace passed '- -' into the ctr)
 */
static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev)
{}

/* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */
static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
				enum sync_state state, sector_t resync_max_sectors)
{}

/* Helper to return @dev name or "-" if !@dev */
static const char *__get_dev_name(struct dm_dev *dev)
{}

static void raid_status(struct dm_target *ti, status_type_t type,
			unsigned int status_flags, char *result, unsigned int maxlen)
{}

static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
			char *result, unsigned int maxlen)
{}

static int raid_iterate_devices(struct dm_target *ti,
				iterate_devices_callout_fn fn, void *data)
{}

static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
{}

static void raid_presuspend(struct dm_target *ti)
{}

static void raid_presuspend_undo(struct dm_target *ti)
{}

static void raid_postsuspend(struct dm_target *ti)
{}

static void attempt_restore_of_faulty_devices(struct raid_set *rs)
{}

static int __load_dirty_region_bitmap(struct raid_set *rs)
{}

/* Enforce updating all superblocks */
static void rs_update_sbs(struct raid_set *rs)
{}

/*
 * Reshape changes raid algorithm of @rs to new one within personality
 * (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes
 * disks from a raid set thus growing/shrinking it or resizes the set
 *
 * Call mddev_lock_nointr() before!
 */
static int rs_start_reshape(struct raid_set *rs)
{}

static int raid_preresume(struct dm_target *ti)
{}

static void raid_resume(struct dm_target *ti)
{}

static struct target_type raid_target =;
module_dm(raid);

module_param(devices_handle_discard_safely, bool, 0644);
MODULE_PARM_DESC();

MODULE_DESCRIPTION();
MODULE_ALIAS();
MODULE_ALIAS();
MODULE_ALIAS();
MODULE_ALIAS();
MODULE_ALIAS();
MODULE_ALIAS();
MODULE_AUTHOR();
MODULE_AUTHOR();
MODULE_LICENSE();