// SPDX-License-Identifier: GPL-2.0 /* * Zoned block device handling * * Copyright (c) 2015, Hannes Reinecke * Copyright (c) 2015, SUSE Linux GmbH * * Copyright (c) 2016, Damien Le Moal * Copyright (c) 2016, Western Digital * Copyright (c) 2024, Western Digital Corporation or its affiliates. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/sched/mm.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/mempool.h> #include "blk.h" #include "blk-mq-sched.h" #include "blk-mq-debugfs.h" #define ZONE_COND_NAME … static const char *const zone_cond_name[] = …; #undef ZONE_COND_NAME /* * Per-zone write plug. * @node: hlist_node structure for managing the plug using a hash table. * @link: To list the plug in the zone write plug error list of the disk. * @ref: Zone write plug reference counter. A zone write plug reference is * always at least 1 when the plug is hashed in the disk plug hash table. * The reference is incremented whenever a new BIO needing plugging is * submitted and when a function needs to manipulate a plug. The * reference count is decremented whenever a plugged BIO completes and * when a function that referenced the plug returns. The initial * reference is dropped whenever the zone of the zone write plug is reset, * finished and when the zone becomes full (last write BIO to the zone * completes). * @lock: Spinlock to atomically manipulate the plug. * @flags: Flags indicating the plug state. * @zone_no: The number of the zone the plug is managing. * @wp_offset: The zone write pointer location relative to the start of the zone * as a number of 512B sectors. * @bio_list: The list of BIOs that are currently plugged. * @bio_work: Work struct to handle issuing of plugged BIOs * @rcu_head: RCU head to free zone write plugs with an RCU grace period. * @disk: The gendisk the plug belongs to. */ struct blk_zone_wplug { … }; /* * Zone write plug flags bits: * - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged, * that is, that write BIOs are being throttled due to a write BIO already * being executed or the zone write plug bio list is not empty. * - BLK_ZONE_WPLUG_ERROR: Indicates that a write error happened which will be * recovered with a report zone to update the zone write pointer offset. * - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed * from the disk hash table and that the initial reference to the zone * write plug set when the plug was first added to the hash table has been * dropped. This flag is set when a zone is reset, finished or become full, * to prevent new references to the zone write plug to be taken for * newly incoming BIOs. A zone write plug flagged with this flag will be * freed once all remaining references from BIOs or functions are dropped. */ #define BLK_ZONE_WPLUG_PLUGGED … #define BLK_ZONE_WPLUG_ERROR … #define BLK_ZONE_WPLUG_UNHASHED … #define BLK_ZONE_WPLUG_BUSY … /** * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX. * @zone_cond: BLK_ZONE_COND_XXX. * * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX * into string format. Useful in the debugging and tracing zone conditions. For * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN". */ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond) { … } EXPORT_SYMBOL_GPL(…); /** * blkdev_report_zones - Get zones information * @bdev: Target block device * @sector: Sector from which to report zones * @nr_zones: Maximum number of zones to report * @cb: Callback function called for each reported zone * @data: Private data for the callback * * Description: * Get zone information starting from the zone containing @sector for at most * @nr_zones, and call @cb for each zone reported by the device. * To report all zones in a device starting from @sector, the BLK_ALL_ZONES * constant can be passed to @nr_zones. * Returns the number of zones reported by the device, or a negative errno * value in case of failure. * * Note: The caller must use memalloc_noXX_save/restore() calls to control * memory allocations done within this function. */ int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { … } EXPORT_SYMBOL_GPL(…); static int blkdev_zone_reset_all(struct block_device *bdev) { … } /** * blkdev_zone_mgmt - Execute a zone management operation on a range of zones * @bdev: Target block device * @op: Operation to be performed on the zones * @sector: Start sector of the first zone to operate on * @nr_sectors: Number of sectors, should be at least the length of one zone and * must be zone size aligned. * * Description: * Perform the specified operation on the range of zones specified by * @sector..@sector+@nr_sectors. Specifying the entire disk sector range * is valid, but the specified range should not contain conventional zones. * The operation to execute on each zone can be a zone reset, open, close * or finish request. */ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, sector_t sector, sector_t nr_sectors) { … } EXPORT_SYMBOL_GPL(…); struct zone_report_args { … }; static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx, void *data) { … } /* * BLKREPORTZONE ioctl processing. * Called from blkdev_ioctl. */ int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg) { … } static int blkdev_truncate_zone_range(struct block_device *bdev, blk_mode_t mode, const struct blk_zone_range *zrange) { … } /* * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. * Called from blkdev_ioctl. */ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { … } static inline bool disk_zone_is_conv(struct gendisk *disk, sector_t sector) { … } static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone) { … } static bool disk_zone_is_full(struct gendisk *disk, unsigned int zno, unsigned int offset_in_zone) { … } static bool disk_zone_wplug_is_full(struct gendisk *disk, struct blk_zone_wplug *zwplug) { … } static bool disk_insert_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { … } static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, sector_t sector) { … } static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head) { … } static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug) { … } static inline bool disk_should_remove_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { … } static void disk_remove_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { … } static void blk_zone_wplug_bio_work(struct work_struct *work); /* * Get a reference on the write plug for the zone containing @sector. * If the plug does not exist, it is allocated and hashed. * Return a pointer to the zone write plug with the plug spinlock held. */ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk, sector_t sector, gfp_t gfp_mask, unsigned long *flags) { … } static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug, struct bio *bio) { … } /* * Abort (fail) all plugged BIOs of a zone write plug. */ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) { … } /* * Abort (fail) all plugged BIOs of a zone write plug that are not aligned * with the assumed write pointer location of the zone when the BIO will * be unplugged. */ static void disk_zone_wplug_abort_unaligned(struct gendisk *disk, struct blk_zone_wplug *zwplug) { … } static inline void disk_zone_wplug_set_error(struct gendisk *disk, struct blk_zone_wplug *zwplug) { … } static inline void disk_zone_wplug_clear_error(struct gendisk *disk, struct blk_zone_wplug *zwplug) { … } /* * Set a zone write plug write pointer offset to either 0 (zone reset case) * or to the zone size (zone finish case). This aborts all plugged BIOs, which * is fine to do as doing a zone reset or zone finish while writes are in-flight * is a mistake from the user which will most likely cause all plugged BIOs to * fail anyway. */ static void disk_zone_wplug_set_wp_offset(struct gendisk *disk, struct blk_zone_wplug *zwplug, unsigned int wp_offset) { … } static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio, unsigned int wp_offset) { … } static bool blk_zone_wplug_handle_reset_all(struct bio *bio) { … } static inline void blk_zone_wplug_add_bio(struct blk_zone_wplug *zwplug, struct bio *bio, unsigned int nr_segs) { … } /* * Called from bio_attempt_back_merge() when a BIO was merged with a request. */ void blk_zone_write_plug_bio_merged(struct bio *bio) { … } /* * Attempt to merge plugged BIOs with a newly prepared request for a BIO that * already went through zone write plugging (either a new BIO or one that was * unplugged). */ void blk_zone_write_plug_init_request(struct request *req) { … } /* * Check and prepare a BIO for submission by incrementing the write pointer * offset of its zone write plug and changing zone append operations into * regular write when zone append emulation is needed. */ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, struct bio *bio) { … } static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) { … } /** * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging * @bio: The BIO being submitted * @nr_segs: The number of physical segments of @bio * * Handle write, write zeroes and zone append operations requiring emulation * using zone write plugging. * * Return true whenever @bio execution needs to be delayed through the zone * write plug. Otherwise, return false to let the submission path process * @bio normally. */ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) { … } EXPORT_SYMBOL_GPL(…); static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk, struct blk_zone_wplug *zwplug) { … } static void disk_zone_wplug_unplug_bio(struct gendisk *disk, struct blk_zone_wplug *zwplug) { … } void blk_zone_write_plug_bio_endio(struct bio *bio) { … } void blk_zone_write_plug_finish_request(struct request *req) { … } static void blk_zone_wplug_bio_work(struct work_struct *work) { … } static unsigned int blk_zone_wp_offset(struct blk_zone *zone) { … } static int blk_zone_wplug_report_zone_cb(struct blk_zone *zone, unsigned int idx, void *data) { … } static void disk_zone_wplug_handle_error(struct gendisk *disk, struct blk_zone_wplug *zwplug) { … } static void disk_zone_wplugs_work(struct work_struct *work) { … } static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk) { … } void disk_init_zone_resources(struct gendisk *disk) { … } /* * For the size of a disk zone write plug hash table, use the size of the * zone write plug mempool, which is the maximum of the disk open zones and * active zones limits. But do not exceed 4KB (512 hlist head entries), that is, * 9 bits. For a disk that has no limits, mempool size defaults to 128. */ #define BLK_ZONE_WPLUG_MAX_HASH_BITS … #define BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE … static int disk_alloc_zone_resources(struct gendisk *disk, unsigned int pool_size) { … } static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk) { … } void disk_free_zone_resources(struct gendisk *disk) { … } static inline bool disk_need_zone_resources(struct gendisk *disk) { … } static int disk_revalidate_zone_resources(struct gendisk *disk, unsigned int nr_zones) { … } struct blk_revalidate_zone_args { … }; /* * Update the disk zone resources information and device queue limits. * The disk queue is frozen when this is executed. */ static int disk_update_zone_resources(struct gendisk *disk, struct blk_revalidate_zone_args *args) { … } static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx, struct blk_revalidate_zone_args *args) { … } static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx, struct blk_revalidate_zone_args *args) { … } /* * Helper function to check the validity of zones of a zoned block device. */ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, void *data) { … } /** * blk_revalidate_disk_zones - (re)allocate and initialize zone write plugs * @disk: Target disk * * Helper function for low-level device drivers to check, (re) allocate and * initialize resources used for managing zoned disks. This function should * normally be called by blk-mq based drivers when a zoned gendisk is probed * and when the zone configuration of the gendisk changes (e.g. after a format). * Before calling this function, the device driver must already have set the * device zone size (chunk_sector limit) and the max zone append limit. * BIO based drivers can also use this function as long as the device queue * can be safely frozen. */ int blk_revalidate_disk_zones(struct gendisk *disk) { … } EXPORT_SYMBOL_GPL(…); #ifdef CONFIG_BLK_DEBUG_FS int queue_zone_wplugs_show(void *data, struct seq_file *m) { … } #endif