// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010-2011 Neil Brown * Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ #include <linux/slab.h> #include <linux/module.h> #include "md.h" #include "raid1.h" #include "raid5.h" #include "raid10.h" #include "md-bitmap.h" #include <linux/device-mapper.h> #define DM_MSG_PREFIX … #define MAX_RAID_DEVICES … /* * Minimum sectors of free reshape space per raid device */ #define MIN_FREE_RESHAPE_SPACE … /* * Minimum journal space 4 MiB in sectors. */ #define MIN_RAID456_JOURNAL_SPACE … static bool devices_handle_discard_safely; /* * The following flags are used by dm-raid to set up the array state. * They must be cleared before md_run is called. */ #define FirstUse … struct raid_dev { … }; /* * Bits for establishing rs->ctr_flags * * 1 = no flag value * 2 = flag with value */ #define __CTR_FLAG_SYNC … #define __CTR_FLAG_NOSYNC … #define __CTR_FLAG_REBUILD … #define __CTR_FLAG_DAEMON_SLEEP … #define __CTR_FLAG_MIN_RECOVERY_RATE … #define __CTR_FLAG_MAX_RECOVERY_RATE … #define __CTR_FLAG_MAX_WRITE_BEHIND … #define __CTR_FLAG_WRITE_MOSTLY … #define __CTR_FLAG_STRIPE_CACHE … #define __CTR_FLAG_REGION_SIZE … #define __CTR_FLAG_RAID10_COPIES … #define __CTR_FLAG_RAID10_FORMAT … /* New for v1.9.0 */ #define __CTR_FLAG_DELTA_DISKS … #define __CTR_FLAG_DATA_OFFSET … #define __CTR_FLAG_RAID10_USE_NEAR_SETS … /* New for v1.10.0 */ #define __CTR_FLAG_JOURNAL_DEV … /* New for v1.11.1 */ #define __CTR_FLAG_JOURNAL_MODE … /* * Flags for rs->ctr_flags field. */ #define CTR_FLAG_SYNC … #define CTR_FLAG_NOSYNC … #define CTR_FLAG_REBUILD … #define CTR_FLAG_DAEMON_SLEEP … #define CTR_FLAG_MIN_RECOVERY_RATE … #define CTR_FLAG_MAX_RECOVERY_RATE … #define CTR_FLAG_MAX_WRITE_BEHIND … #define CTR_FLAG_WRITE_MOSTLY … #define CTR_FLAG_STRIPE_CACHE … #define CTR_FLAG_REGION_SIZE … #define CTR_FLAG_RAID10_COPIES … #define CTR_FLAG_RAID10_FORMAT … #define CTR_FLAG_DELTA_DISKS … #define CTR_FLAG_DATA_OFFSET … #define CTR_FLAG_RAID10_USE_NEAR_SETS … #define CTR_FLAG_JOURNAL_DEV … #define CTR_FLAG_JOURNAL_MODE … /* * Definitions of various constructor flags to * be used in checks of valid / invalid flags * per raid level. */ /* Define all any sync flags */ #define CTR_FLAGS_ANY_SYNC … /* Define flags for options without argument (e.g. 'nosync') */ #define CTR_FLAG_OPTIONS_NO_ARGS … /* Define flags for options with one argument (e.g. 'delta_disks +2') */ #define CTR_FLAG_OPTIONS_ONE_ARG … /* Valid options definitions per raid level... */ /* "raid0" does only accept data offset */ #define RAID0_VALID_FLAGS … /* "raid1" does not accept stripe cache, data offset, delta_disks or any raid10 options */ #define RAID1_VALID_FLAGS … /* "raid10" does not accept any raid1 or stripe cache options */ #define RAID10_VALID_FLAGS … /* * "raid4/5/6" do not accept any raid1 or raid10 specific options * * "raid6" does not accept "nosync", because it is not guaranteed * that both parity and q-syndrome are being written properly with * any writes */ #define RAID45_VALID_FLAGS … #define RAID6_VALID_FLAGS … /* ...valid options definitions per raid level */ /* * Flags for rs->runtime_flags field * (RT_FLAG prefix meaning "runtime flag") * * These are all internal and used to define runtime state, * e.g. to prevent another resume from preresume processing * the raid set all over again. */ #define RT_FLAG_RS_PRERESUMED … #define RT_FLAG_RS_RESUMED … #define RT_FLAG_RS_BITMAP_LOADED … #define RT_FLAG_UPDATE_SBS … #define RT_FLAG_RESHAPE_RS … #define RT_FLAG_RS_SUSPENDED … #define RT_FLAG_RS_IN_SYNC … #define RT_FLAG_RS_RESYNCING … #define RT_FLAG_RS_GROW … #define RT_FLAG_RS_FROZEN … /* Array elements of 64 bit needed for rebuild/failed disk bits */ #define DISKS_ARRAY_ELEMS … /* * raid set level, layout and chunk sectors backup/restore */ struct rs_layout { … }; struct raid_set { … }; static void rs_config_backup(struct raid_set *rs, struct rs_layout *l) { … } static void rs_config_restore(struct raid_set *rs, struct rs_layout *l) { … } /* raid10 algorithms (i.e. formats) */ #define ALGORITHM_RAID10_DEFAULT … #define ALGORITHM_RAID10_NEAR … #define ALGORITHM_RAID10_OFFSET … #define ALGORITHM_RAID10_FAR … /* Supported raid types and properties. */ static struct raid_type { … } raid_types[] = …; /* True, if @v is in inclusive range [@min, @max] */ static bool __within_range(long v, long min, long max) { … } /* All table line arguments are defined here */ static struct arg_name_flag { … } __arg_name_flags[] = …; /* Return argument name string for given @flag */ static const char *dm_raid_arg_name_by_flag(const uint32_t flag) { … } /* Define correlation of raid456 journal cache modes and dm-raid target line parameters */ static struct { … } _raid456_journal_mode[] = …; /* Return MD raid4/5/6 journal mode for dm @journal_mode one */ static int dm_raid_journal_mode_to_md(const char *mode) { … } /* Return dm-raid raid4/5/6 journal mode string for @mode */ static const char *md_journal_mode_to_dm_raid(const int mode) { … } /* * Bool helpers to test for various raid levels of a raid set. * It's level as reported by the superblock rather than * the requested raid_type passed to the constructor. */ /* Return true, if raid set in @rs is raid0 */ static bool rs_is_raid0(struct raid_set *rs) { … } /* Return true, if raid set in @rs is raid1 */ static bool rs_is_raid1(struct raid_set *rs) { … } /* Return true, if raid set in @rs is raid10 */ static bool rs_is_raid10(struct raid_set *rs) { … } /* Return true, if raid set in @rs is level 6 */ static bool rs_is_raid6(struct raid_set *rs) { … } /* Return true, if raid set in @rs is level 4, 5 or 6 */ static bool rs_is_raid456(struct raid_set *rs) { … } /* Return true, if raid set in @rs is reshapable */ static bool __is_raid10_far(int layout); static bool rs_is_reshapable(struct raid_set *rs) { … } /* Return true, if raid set in @rs is recovering */ static bool rs_is_recovering(struct raid_set *rs) { … } /* Return true, if raid set in @rs is reshaping */ static bool rs_is_reshaping(struct raid_set *rs) { … } /* * bool helpers to test for various raid levels of a raid type @rt */ /* Return true, if raid type in @rt is raid0 */ static bool rt_is_raid0(struct raid_type *rt) { … } /* Return true, if raid type in @rt is raid1 */ static bool rt_is_raid1(struct raid_type *rt) { … } /* Return true, if raid type in @rt is raid10 */ static bool rt_is_raid10(struct raid_type *rt) { … } /* Return true, if raid type in @rt is raid4/5 */ static bool rt_is_raid45(struct raid_type *rt) { … } /* Return true, if raid type in @rt is raid6 */ static bool rt_is_raid6(struct raid_type *rt) { … } /* Return true, if raid type in @rt is raid4/5/6 */ static bool rt_is_raid456(struct raid_type *rt) { … } /* END: raid level bools */ /* Return valid ctr flags for the raid level of @rs */ static unsigned long __valid_flags(struct raid_set *rs) { … } /* * Check for valid flags set on @rs * * Has to be called after parsing of the ctr flags! */ static int rs_check_for_valid_flags(struct raid_set *rs) { … } /* MD raid10 bit definitions and helpers */ #define RAID10_OFFSET … #define RAID10_BROCKEN_USE_FAR_SETS … #define RAID10_USE_FAR_SETS … #define RAID10_FAR_COPIES_SHIFT … /* Return md raid10 near copies for @layout */ static unsigned int __raid10_near_copies(int layout) { … } /* Return md raid10 far copies for @layout */ static unsigned int __raid10_far_copies(int layout) { … } /* Return true if md raid10 offset for @layout */ static bool __is_raid10_offset(int layout) { … } /* Return true if md raid10 near for @layout */ static bool __is_raid10_near(int layout) { … } /* Return true if md raid10 far for @layout */ static bool __is_raid10_far(int layout) { … } /* Return md raid10 layout string for @layout */ static const char *raid10_md_layout_to_format(int layout) { … } /* Return md raid10 algorithm for @name */ static int raid10_name_to_format(const char *name) { … } /* Return md raid10 copies for @layout */ static unsigned int raid10_md_layout_to_copies(int layout) { … } /* Return md raid10 format id for @format string */ static int raid10_format_to_md_layout(struct raid_set *rs, unsigned int algorithm, unsigned int copies) { … } /* END: MD raid10 bit definitions and helpers */ /* Check for any of the raid10 algorithms */ static bool __got_raid10(struct raid_type *rtp, const int layout) { … } /* Return raid_type for @name */ static struct raid_type *get_raid_type(const char *name) { … } /* Return raid_type for @name based derived from @level and @layout */ static struct raid_type *get_raid_type_by_ll(const int level, const int layout) { … } /* Adjust rdev sectors */ static void rs_set_rdev_sectors(struct raid_set *rs) { … } /* * Change bdev capacity of @rs in case of a disk add/remove reshape */ static void rs_set_capacity(struct raid_set *rs) { … } /* * Set the mddev properties in @rs to the current * ones retrieved from the freshest superblock */ static void rs_set_cur(struct raid_set *rs) { … } /* * Set the mddev properties in @rs to the new * ones requested by the ctr */ static void rs_set_new(struct raid_set *rs) { … } static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *raid_type, unsigned int raid_devs) { … } /* Free all @rs allocations */ static void raid_set_free(struct raid_set *rs) { … } /* * For every device we have two words * <meta_dev>: meta device name or '-' if missing * <data_dev>: data device name or '-' if missing * * The following are permitted: * - - * - <data_dev> * <meta_dev> <data_dev> * * The following is not allowed: * <meta_dev> - * * This code parses those words. If there is a failure, * the caller must use raid_set_free() to unwind the operations. */ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as) { … } /* * validate_region_size * @rs * @region_size: region size in sectors. If 0, pick a size (4MiB default). * * Set rs->md.bitmap_info.chunksize (which really refers to 'region size'). * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap. * * Returns: 0 on success, -EINVAL on failure. */ static int validate_region_size(struct raid_set *rs, unsigned long region_size) { … } /* * validate_raid_redundancy * @rs * * Determine if there are enough devices in the array that haven't * failed (or are being rebuilt) to form a usable array. * * Returns: 0 on success, -EINVAL on failure. */ static int validate_raid_redundancy(struct raid_set *rs) { … } /* * Possible arguments are... * <chunk_size> [optional_args] * * Argument definitions * <chunk_size> The number of sectors per disk that * will form the "stripe" * [[no]sync] Force or prevent recovery of the * entire array * [rebuild <idx>] Rebuild the drive indicated by the index * [daemon_sleep <ms>] Time between bitmap daemon work to * clear bits * [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization * [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization * [write_mostly <idx>] Indicate a write mostly drive via index * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) * [stripe_cache <sectors>] Stripe cache size for higher RAIDs * [region_size <sectors>] Defines granularity of bitmap * [journal_dev <dev>] raid4/5/6 journaling deviice * (i.e. write hole closing log) * * RAID10-only options: * [raid10_copies <# copies>] Number of copies. (Default: 2) * [raid10_format <near|far|offset>] Layout algorithm. (Default: near) */ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, unsigned int num_raid_params) { … } /* Set raid4/5/6 cache size */ static int rs_set_raid456_stripe_cache(struct raid_set *rs) { … } /* Return # of data stripes as kept in mddev as of @rs (i.e. as of superblock) */ static unsigned int mddev_data_stripes(struct raid_set *rs) { … } /* Return # of data stripes of @rs (i.e. as of ctr) */ static unsigned int rs_data_stripes(struct raid_set *rs) { … } /* * Retrieve rdev->sectors from any valid raid device of @rs * to allow userpace to pass in arbitray "- -" device tupples. */ static sector_t __rdev_sectors(struct raid_set *rs) { … } /* Check that calculated dev_sectors fits all component devices. */ static int _check_data_dev_sectors(struct raid_set *rs) { … } /* Get reshape sectors from data_offsets or raid set */ static sector_t _get_reshape_sectors(struct raid_set *rs) { … } /* Calculate the sectors per device and per array used for @rs */ static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev) { … } /* Setup recovery on @rs */ static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) { … } static void do_table_event(struct work_struct *ws) { … } /* * Make sure a valid takover (level switch) is being requested on @rs * * Conversions of raid sets from one MD personality to another * have to conform to restrictions which are enforced here. */ static int rs_check_takeover(struct raid_set *rs) { … } /* True if @rs requested to be taken over */ static bool rs_takeover_requested(struct raid_set *rs) { … } /* True if layout is set to reshape. */ static bool rs_is_layout_change(struct raid_set *rs, bool use_mddev) { … } /* True if @rs is requested to reshape by ctr */ static bool rs_reshape_requested(struct raid_set *rs) { … } /* Features */ #define FEATURE_FLAG_SUPPORTS_V190 … /* State flags for sb->flags */ #define SB_FLAG_RESHAPE_ACTIVE … #define SB_FLAG_RESHAPE_BACKWARDS … /* * This structure is never routinely used by userspace, unlike md superblocks. * Devices with this superblock should only ever be accessed via device-mapper. */ #define DM_RAID_MAGIC … struct dm_raid_superblock { … } __packed; /* * Check for reshape constraints on raid set @rs: * * - reshape function non-existent * - degraded set * - ongoing recovery * - ongoing reshape * * Returns 0 if none or -EPERM if given constraint * and error message reference in @errmsg */ static int rs_check_reshape(struct raid_set *rs) { … } static int read_disk_sb(struct md_rdev *rdev, int size, bool force_reload) { … } static void sb_retrieve_failed_devices(struct dm_raid_superblock *sb, uint64_t *failed_devices) { … } static void sb_update_failed_devices(struct dm_raid_superblock *sb, uint64_t *failed_devices) { … } /* * Synchronize the superblock members with the raid set properties * * All superblock data is little endian. */ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) { … } /* * super_load * * This function creates a superblock if one is not found on the device * and will decide which superblock to use if there's a choice. * * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise */ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) { … } static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) { … } static int super_validate(struct raid_set *rs, struct md_rdev *rdev) { … } /* * Analyse superblocks and select the freshest. */ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) { … } /* * Adjust data_offset and new_data_offset on all disk members of @rs * for out of place reshaping if requested by constructor * * We need free space at the beginning of each raid disk for forward * and at the end for backward reshapes which userspace has to provide * via remapping/reordering of space. */ static int rs_adjust_data_offsets(struct raid_set *rs) { … } /* Userpace reordered disks -> adjust raid_disk indexes in @rs */ static void __reorder_raid_disk_indexes(struct raid_set *rs) { … } /* * Setup @rs for takeover by a different raid level */ static int rs_setup_takeover(struct raid_set *rs) { … } /* Prepare @rs for reshape */ static int rs_prepare_reshape(struct raid_set *rs) { … } /* * Reshape: * - change raid layout * - change chunk size * - add disks * - remove disks */ static int rs_setup_reshape(struct raid_set *rs) { … } /* * If the md resync thread has updated superblock with max reshape position * at the end of a reshape but not (yet) reset the layout configuration * changes -> reset the latter. */ static void rs_reset_inconclusive_reshape(struct raid_set *rs) { … } /* * Enable/disable discard support on RAID set depending on * RAID level and discard properties of underlying RAID members. */ static void configure_discard_support(struct raid_set *rs) { … } /* * Construct a RAID0/1/10/4/5/6 mapping: * Args: * <raid_type> <#raid_params> <raid_params>{0,} \ * <#raid_devs> [<meta_dev1> <dev1>]{1,} * * <raid_params> varies by <raid_type>. See 'parse_raid_params' for * details on possible <raid_params>. * * Userspace is free to initialize the metadata devices, hence the superblocks to * enforce recreation based on the passed in table parameters. * */ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) { … } static void raid_dtr(struct dm_target *ti) { … } static int raid_map(struct dm_target *ti, struct bio *bio) { … } /* Return sync state string for @state */ enum sync_state { … }; static const char *sync_str(enum sync_state state) { /* Has to be in above sync_state order! */ static const char *sync_strs[] = { "frozen", "reshape", "resync", "check", "repair", "recover", "idle" }; return __within_range(state, 0, ARRAY_SIZE(sync_strs) - 1) ? sync_strs[state] : "undef"; }; /* Return enum sync_state for @mddev derived from @recovery flags */ static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery) { … } /* * Return status string for @rdev * * Status characters: * * 'D' = Dead/Failed raid set component or raid4/5/6 journal device * 'a' = Alive but not in-sync raid set component _or_ alive raid4/5/6 'write_back' journal device * 'A' = Alive and in-sync raid set component _or_ alive raid4/5/6 'write_through' journal device * '-' = Non-existing device (i.e. uspace passed '- -' into the ctr) */ static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev) { … } /* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, enum sync_state state, sector_t resync_max_sectors) { … } /* Helper to return @dev name or "-" if !@dev */ static const char *__get_dev_name(struct dm_dev *dev) { … } static void raid_status(struct dm_target *ti, status_type_t type, unsigned int status_flags, char *result, unsigned int maxlen) { … } static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, char *result, unsigned int maxlen) { … } static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { … } static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) { … } static void raid_presuspend(struct dm_target *ti) { … } static void raid_presuspend_undo(struct dm_target *ti) { … } static void raid_postsuspend(struct dm_target *ti) { … } static void attempt_restore_of_faulty_devices(struct raid_set *rs) { … } static int __load_dirty_region_bitmap(struct raid_set *rs) { … } /* Enforce updating all superblocks */ static void rs_update_sbs(struct raid_set *rs) { … } /* * Reshape changes raid algorithm of @rs to new one within personality * (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes * disks from a raid set thus growing/shrinking it or resizes the set * * Call mddev_lock_nointr() before! */ static int rs_start_reshape(struct raid_set *rs) { … } static int raid_preresume(struct dm_target *ti) { … } static void raid_resume(struct dm_target *ti) { … } static struct target_type raid_target = …; module_dm(raid); module_param(devices_handle_discard_safely, bool, 0644); MODULE_PARM_DESC(…) …; MODULE_DESCRIPTION(…) …; MODULE_ALIAS(…) …; MODULE_ALIAS(…) …; MODULE_ALIAS(…) …; MODULE_ALIAS(…) …; MODULE_ALIAS(…) …; MODULE_ALIAS(…) …; MODULE_AUTHOR(…) …; MODULE_AUTHOR(…) …; MODULE_LICENSE(…) …;