linux/drivers/mtd/ubi/wl.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) International Business Machines Corp., 2006
 *
 * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner
 */

/*
 * UBI wear-leveling sub-system.
 *
 * This sub-system is responsible for wear-leveling. It works in terms of
 * physical eraseblocks and erase counters and knows nothing about logical
 * eraseblocks, volumes, etc. From this sub-system's perspective all physical
 * eraseblocks are of two types - used and free. Used physical eraseblocks are
 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
 * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
 *
 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
 * header. The rest of the physical eraseblock contains only %0xFF bytes.
 *
 * When physical eraseblocks are returned to the WL sub-system by means of the
 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
 * done asynchronously in context of the per-UBI device background thread,
 * which is also managed by the WL sub-system.
 *
 * The wear-leveling is ensured by means of moving the contents of used
 * physical eraseblocks with low erase counter to free physical eraseblocks
 * with high erase counter.
 *
 * If the WL sub-system fails to erase a physical eraseblock, it marks it as
 * bad.
 *
 * This sub-system is also responsible for scrubbing. If a bit-flip is detected
 * in a physical eraseblock, it has to be moved. Technically this is the same
 * as moving it for wear-leveling reasons.
 *
 * As it was said, for the UBI sub-system all physical eraseblocks are either
 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
 * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub
 * RB-trees, as well as (temporarily) in the @wl->pq queue.
 *
 * When the WL sub-system returns a physical eraseblock, the physical
 * eraseblock is protected from being moved for some "time". For this reason,
 * the physical eraseblock is not directly moved from the @wl->free tree to the
 * @wl->used tree. There is a protection queue in between where this
 * physical eraseblock is temporarily stored (@wl->pq).
 *
 * All this protection stuff is needed because:
 *  o we don't want to move physical eraseblocks just after we have given them
 *    to the user; instead, we first want to let users fill them up with data;
 *
 *  o there is a chance that the user will put the physical eraseblock very
 *    soon, so it makes sense not to move it for some time, but wait.
 *
 * Physical eraseblocks stay protected only for limited time. But the "time" is
 * measured in erase cycles in this case. This is implemented with help of the
 * protection queue. Eraseblocks are put to the tail of this queue when they
 * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the
 * head of the queue on each erase operation (for any eraseblock). So the
 * length of the queue defines how may (global) erase cycles PEBs are protected.
 *
 * To put it differently, each physical eraseblock has 2 main states: free and
 * used. The former state corresponds to the @wl->free tree. The latter state
 * is split up on several sub-states:
 * o the WL movement is allowed (@wl->used tree);
 * o the WL movement is disallowed (@wl->erroneous) because the PEB is
 *   erroneous - e.g., there was a read error;
 * o the WL movement is temporarily prohibited (@wl->pq queue);
 * o scrubbing is needed (@wl->scrub tree).
 *
 * Depending on the sub-state, wear-leveling entries of the used physical
 * eraseblocks may be kept in one of those structures.
 *
 * Note, in this implementation, we keep a small in-RAM object for each physical
 * eraseblock. This is surely not a scalable solution. But it appears to be good
 * enough for moderately large flashes and it is simple. In future, one may
 * re-work this sub-system and make it more scalable.
 *
 * At the moment this sub-system does not utilize the sequence number, which
 * was introduced relatively recently. But it would be wise to do this because
 * the sequence number of a logical eraseblock characterizes how old is it. For
 * example, when we move a PEB with low erase counter, and we need to pick the
 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
 * pick target PEB with an average EC if our PEB is not very "old". This is a
 * room for future re-works of the WL sub-system.
 */

#include <linux/slab.h>
#include <linux/crc32.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include "ubi.h"
#include "wl.h"

/* Number of physical eraseblocks reserved for wear-leveling purposes */
#define WL_RESERVED_PEBS

/*
 * Maximum difference between two erase counters. If this threshold is
 * exceeded, the WL sub-system starts moving data from used physical
 * eraseblocks with low erase counter to free physical eraseblocks with high
 * erase counter.
 */
#define UBI_WL_THRESHOLD

/*
 * When a physical eraseblock is moved, the WL sub-system has to pick the target
 * physical eraseblock to move to. The simplest way would be just to pick the
 * one with the highest erase counter. But in certain workloads this could lead
 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
 * situation when the picked physical eraseblock is constantly erased after the
 * data is written to it. So, we have a constant which limits the highest erase
 * counter of the free physical eraseblock to pick. Namely, the WL sub-system
 * does not pick eraseblocks with erase counter greater than the lowest erase
 * counter plus %WL_FREE_MAX_DIFF.
 */
#define WL_FREE_MAX_DIFF

/*
 * Maximum number of consecutive background thread failures which is enough to
 * switch to read-only mode.
 */
#define WL_MAX_FAILURES

static int self_check_ec(struct ubi_device *ubi, int pnum, int ec);
static int self_check_in_wl_tree(const struct ubi_device *ubi,
				 struct ubi_wl_entry *e, struct rb_root *root);
static int self_check_in_pq(const struct ubi_device *ubi,
			    struct ubi_wl_entry *e);

/**
 * wl_tree_add - add a wear-leveling entry to a WL RB-tree.
 * @e: the wear-leveling entry to add
 * @root: the root of the tree
 *
 * Note, we use (erase counter, physical eraseblock number) pairs as keys in
 * the @ubi->used and @ubi->free RB-trees.
 */
static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
{}

/**
 * wl_entry_destroy - destroy a wear-leveling entry.
 * @ubi: UBI device description object
 * @e: the wear-leveling entry to add
 *
 * This function destroys a wear leveling entry and removes
 * the reference from the lookup table.
 */
static void wl_entry_destroy(struct ubi_device *ubi, struct ubi_wl_entry *e)
{}

/**
 * do_work - do one pending work.
 * @ubi: UBI device description object
 * @executed: whether there is one work is executed
 *
 * This function returns zero in case of success and a negative error code in
 * case of failure. If @executed is not NULL and there is one work executed,
 * @executed is set as %1, otherwise @executed is set as %0.
 */
static int do_work(struct ubi_device *ubi, int *executed)
{}

/**
 * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree.
 * @e: the wear-leveling entry to check
 * @root: the root of the tree
 *
 * This function returns non-zero if @e is in the @root RB-tree and zero if it
 * is not.
 */
static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
{}

/**
 * in_pq - check if a wear-leveling entry is present in the protection queue.
 * @ubi: UBI device description object
 * @e: the wear-leveling entry to check
 *
 * This function returns non-zero if @e is in the protection queue and zero
 * if it is not.
 */
static inline int in_pq(const struct ubi_device *ubi, struct ubi_wl_entry *e)
{}

/**
 * prot_queue_add - add physical eraseblock to the protection queue.
 * @ubi: UBI device description object
 * @e: the physical eraseblock to add
 *
 * This function adds @e to the tail of the protection queue @ubi->pq, where
 * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be
 * temporarily protected from the wear-leveling worker. Note, @wl->lock has to
 * be locked.
 */
static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e)
{}

/**
 * find_wl_entry - find wear-leveling entry closest to certain erase counter.
 * @ubi: UBI device description object
 * @root: the RB-tree where to look for
 * @diff: maximum possible difference from the smallest erase counter
 * @pick_max: pick PEB even its erase counter beyonds 'min_ec + @diff'
 *
 * This function looks for a wear leveling entry with erase counter closest to
 * min + @diff, where min is the smallest erase counter.
 */
static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi,
					  struct rb_root *root, int diff,
					  int pick_max)
{}

/**
 * find_mean_wl_entry - find wear-leveling entry with medium erase counter.
 * @ubi: UBI device description object
 * @root: the RB-tree where to look for
 *
 * This function looks for a wear leveling entry with medium erase counter,
 * but not greater or equivalent than the lowest erase counter plus
 * %WL_FREE_MAX_DIFF/2.
 */
static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi,
					       struct rb_root *root)
{}

/**
 * wl_get_wle - get a mean wl entry to be used by ubi_wl_get_peb() or
 * refill_wl_user_pool().
 * @ubi: UBI device description object
 *
 * This function returns a wear leveling entry in case of success and
 * NULL in case of failure.
 */
static struct ubi_wl_entry *wl_get_wle(struct ubi_device *ubi)
{}

/**
 * prot_queue_del - remove a physical eraseblock from the protection queue.
 * @ubi: UBI device description object
 * @pnum: the physical eraseblock to remove
 *
 * This function deletes PEB @pnum from the protection queue and returns zero
 * in case of success and %-ENODEV if the PEB was not found.
 */
static int prot_queue_del(struct ubi_device *ubi, int pnum)
{}

/**
 * ubi_sync_erase - synchronously erase a physical eraseblock.
 * @ubi: UBI device description object
 * @e: the physical eraseblock to erase
 * @torture: if the physical eraseblock has to be tortured
 *
 * This function returns zero in case of success and a negative error code in
 * case of failure.
 */
int ubi_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture)
{}

/**
 * serve_prot_queue - check if it is time to stop protecting PEBs.
 * @ubi: UBI device description object
 *
 * This function is called after each erase operation and removes PEBs from the
 * tail of the protection queue. These PEBs have been protected for long enough
 * and should be moved to the used tree.
 */
static void serve_prot_queue(struct ubi_device *ubi)
{}

/**
 * __schedule_ubi_work - schedule a work.
 * @ubi: UBI device description object
 * @wrk: the work to schedule
 *
 * This function adds a work defined by @wrk to the tail of the pending works
 * list. Can only be used if ubi->work_sem is already held in read mode!
 */
static void __schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
{}

/**
 * schedule_ubi_work - schedule a work.
 * @ubi: UBI device description object
 * @wrk: the work to schedule
 *
 * This function adds a work defined by @wrk to the tail of the pending works
 * list.
 */
static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
{}

static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
			int shutdown);

/**
 * schedule_erase - schedule an erase work.
 * @ubi: UBI device description object
 * @e: the WL entry of the physical eraseblock to erase
 * @vol_id: the volume ID that last used this PEB
 * @lnum: the last used logical eraseblock number for the PEB
 * @torture: if the physical eraseblock has to be tortured
 * @nested: denotes whether the work_sem is already held
 *
 * This function returns zero in case of success and a %-ENOMEM in case of
 * failure.
 */
static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
			  int vol_id, int lnum, int torture, bool nested)
{}

static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk);
/**
 * do_sync_erase - run the erase worker synchronously.
 * @ubi: UBI device description object
 * @e: the WL entry of the physical eraseblock to erase
 * @vol_id: the volume ID that last used this PEB
 * @lnum: the last used logical eraseblock number for the PEB
 * @torture: if the physical eraseblock has to be tortured
 *
 */
static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
			 int vol_id, int lnum, int torture)
{}

static int ensure_wear_leveling(struct ubi_device *ubi, int nested);
/**
 * wear_leveling_worker - wear-leveling worker function.
 * @ubi: UBI device description object
 * @wrk: the work object
 * @shutdown: non-zero if the worker has to free memory and exit
 * because the WL-subsystem is shutting down
 *
 * This function copies a more worn out physical eraseblock to a less worn out
 * one. Returns zero in case of success and a negative error code in case of
 * failure.
 */
static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
				int shutdown)
{}

/**
 * ensure_wear_leveling - schedule wear-leveling if it is needed.
 * @ubi: UBI device description object
 * @nested: set to non-zero if this function is called from UBI worker
 *
 * This function checks if it is time to start wear-leveling and schedules it
 * if yes. This function returns zero in case of success and a negative error
 * code in case of failure.
 */
static int ensure_wear_leveling(struct ubi_device *ubi, int nested)
{}

/**
 * __erase_worker - physical eraseblock erase worker function.
 * @ubi: UBI device description object
 * @wl_wrk: the work object
 *
 * This function erases a physical eraseblock and perform torture testing if
 * needed. It also takes care about marking the physical eraseblock bad if
 * needed. Returns zero in case of success and a negative error code in case of
 * failure.
 */
static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
{}

static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
			  int shutdown)
{}

/**
 * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
 * @ubi: UBI device description object
 * @vol_id: the volume ID that last used this PEB
 * @lnum: the last used logical eraseblock number for the PEB
 * @pnum: physical eraseblock to return
 * @torture: if this physical eraseblock has to be tortured
 *
 * This function is called to return physical eraseblock @pnum to the pool of
 * free physical eraseblocks. The @torture flag has to be set if an I/O error
 * occurred to this @pnum and it has to be tested. This function returns zero
 * in case of success, and a negative error code in case of failure.
 */
int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum,
		   int pnum, int torture)
{}

/**
 * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing.
 * @ubi: UBI device description object
 * @pnum: the physical eraseblock to schedule
 *
 * If a bit-flip in a physical eraseblock is detected, this physical eraseblock
 * needs scrubbing. This function schedules a physical eraseblock for
 * scrubbing which is done in background. This function returns zero in case of
 * success and a negative error code in case of failure.
 */
int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
{}

/**
 * ubi_wl_flush - flush all pending works.
 * @ubi: UBI device description object
 * @vol_id: the volume id to flush for
 * @lnum: the logical eraseblock number to flush for
 *
 * This function executes all pending works for a particular volume id /
 * logical eraseblock number pair. If either value is set to %UBI_ALL, then it
 * acts as a wildcard for all of the corresponding volume numbers or logical
 * eraseblock numbers. It returns zero in case of success and a negative error
 * code in case of failure.
 */
int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
{}

static bool scrub_possible(struct ubi_device *ubi, struct ubi_wl_entry *e)
{}

/**
 * ubi_bitflip_check - Check an eraseblock for bitflips and scrub it if needed.
 * @ubi: UBI device description object
 * @pnum: the physical eraseblock to schedule
 * @force: don't read the block, assume bitflips happened and take action.
 *
 * This function reads the given eraseblock and checks if bitflips occured.
 * In case of bitflips, the eraseblock is scheduled for scrubbing.
 * If scrubbing is forced with @force, the eraseblock is not read,
 * but scheduled for scrubbing right away.
 *
 * Returns:
 * %EINVAL, PEB is out of range
 * %ENOENT, PEB is no longer used by UBI
 * %EBUSY, PEB cannot be checked now or a check is currently running on it
 * %EAGAIN, bit flips happened but scrubbing is currently not possible
 * %EUCLEAN, bit flips happened and PEB is scheduled for scrubbing
 * %0, no bit flips detected
 */
int ubi_bitflip_check(struct ubi_device *ubi, int pnum, int force)
{}

/**
 * tree_destroy - destroy an RB-tree.
 * @ubi: UBI device description object
 * @root: the root of the tree to destroy
 */
static void tree_destroy(struct ubi_device *ubi, struct rb_root *root)
{}

/**
 * ubi_thread - UBI background thread.
 * @u: the UBI device description object pointer
 */
int ubi_thread(void *u)
{}

/**
 * shutdown_work - shutdown all pending works.
 * @ubi: UBI device description object
 */
static void shutdown_work(struct ubi_device *ubi)
{}

/**
 * erase_aeb - erase a PEB given in UBI attach info PEB
 * @ubi: UBI device description object
 * @aeb: UBI attach info PEB
 * @sync: If true, erase synchronously. Otherwise schedule for erasure
 */
static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync)
{}

/**
 * ubi_wl_init - initialize the WL sub-system using attaching information.
 * @ubi: UBI device description object
 * @ai: attaching information
 *
 * This function returns zero in case of success, and a negative error code in
 * case of failure.
 */
int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
{}

/**
 * protection_queue_destroy - destroy the protection queue.
 * @ubi: UBI device description object
 */
static void protection_queue_destroy(struct ubi_device *ubi)
{}

/**
 * ubi_wl_close - close the wear-leveling sub-system.
 * @ubi: UBI device description object
 */
void ubi_wl_close(struct ubi_device *ubi)
{}

/**
 * self_check_ec - make sure that the erase counter of a PEB is correct.
 * @ubi: UBI device description object
 * @pnum: the physical eraseblock number to check
 * @ec: the erase counter to check
 *
 * This function returns zero if the erase counter of physical eraseblock @pnum
 * is equivalent to @ec, and a negative error code if not or if an error
 * occurred.
 */
static int self_check_ec(struct ubi_device *ubi, int pnum, int ec)
{}

/**
 * self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
 * @ubi: UBI device description object
 * @e: the wear-leveling entry to check
 * @root: the root of the tree
 *
 * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it
 * is not.
 */
static int self_check_in_wl_tree(const struct ubi_device *ubi,
				 struct ubi_wl_entry *e, struct rb_root *root)
{}

/**
 * self_check_in_pq - check if wear-leveling entry is in the protection
 *                        queue.
 * @ubi: UBI device description object
 * @e: the wear-leveling entry to check
 *
 * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not.
 */
static int self_check_in_pq(const struct ubi_device *ubi,
			    struct ubi_wl_entry *e)
{}
#ifndef CONFIG_MTD_UBI_FASTMAP
static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
{
	struct ubi_wl_entry *e;

	e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0);
	self_check_in_wl_tree(ubi, e, &ubi->free);
	ubi->free_count--;
	ubi_assert(ubi->free_count >= 0);
	rb_erase(&e->u.rb, &ubi->free);

	return e;
}

/**
 * produce_free_peb - produce a free physical eraseblock.
 * @ubi: UBI device description object
 *
 * This function tries to make a free PEB by means of synchronous execution of
 * pending works. This may be needed if, for example the background thread is
 * disabled. Returns zero in case of success and a negative error code in case
 * of failure.
 */
static int produce_free_peb(struct ubi_device *ubi)
{
	int err;

	while (!ubi->free.rb_node && ubi->works_count) {
		spin_unlock(&ubi->wl_lock);

		dbg_wl("do one work synchronously");
		err = do_work(ubi, NULL);

		spin_lock(&ubi->wl_lock);
		if (err)
			return err;
	}

	return 0;
}

/**
 * ubi_wl_get_peb - get a physical eraseblock.
 * @ubi: UBI device description object
 *
 * This function returns a physical eraseblock in case of success and a
 * negative error code in case of failure.
 * Returns with ubi->fm_eba_sem held in read mode!
 */
int ubi_wl_get_peb(struct ubi_device *ubi)
{
	int err;
	struct ubi_wl_entry *e;

retry:
	down_read(&ubi->fm_eba_sem);
	spin_lock(&ubi->wl_lock);
	if (!ubi->free.rb_node) {
		if (ubi->works_count == 0) {
			ubi_err(ubi, "no free eraseblocks");
			ubi_assert(list_empty(&ubi->works));
			spin_unlock(&ubi->wl_lock);
			return -ENOSPC;
		}

		err = produce_free_peb(ubi);
		if (err < 0) {
			spin_unlock(&ubi->wl_lock);
			return err;
		}
		spin_unlock(&ubi->wl_lock);
		up_read(&ubi->fm_eba_sem);
		goto retry;

	}
	e = wl_get_wle(ubi);
	prot_queue_add(ubi, e);
	spin_unlock(&ubi->wl_lock);

	err = ubi_self_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset,
				    ubi->peb_size - ubi->vid_hdr_aloffset);
	if (err) {
		ubi_err(ubi, "new PEB %d does not contain all 0xFF bytes", e->pnum);
		return err;
	}

	return e->pnum;
}
#else
#include "fastmap-wl.c"
#endif