linux/drivers/block/drbd/drbd_bitmap.c

// SPDX-License-Identifier: GPL-2.0-only
/*
   drbd_bitmap.c

   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.

   Copyright (C) 2004-2008, LINBIT Information Technologies GmbH.
   Copyright (C) 2004-2008, Philipp Reisner <[email protected]>.
   Copyright (C) 2004-2008, Lars Ellenberg <[email protected]>.

 */

#define pr_fmt(fmt)

#include <linux/bitmap.h>
#include <linux/vmalloc.h>
#include <linux/string.h>
#include <linux/drbd.h>
#include <linux/slab.h>
#include <linux/highmem.h>

#include "drbd_int.h"


/* OPAQUE outside this file!
 * interface defined in drbd_int.h

 * convention:
 * function name drbd_bm_... => used elsewhere, "public".
 * function name      bm_... => internal to implementation, "private".
 */


/*
 * LIMITATIONS:
 * We want to support >= peta byte of backend storage, while for now still using
 * a granularity of one bit per 4KiB of storage.
 * 1 << 50		bytes backend storage (1 PiB)
 * 1 << (50 - 12)	bits needed
 *	38 --> we need u64 to index and count bits
 * 1 << (38 - 3)	bitmap bytes needed
 *	35 --> we still need u64 to index and count bytes
 *			(that's 32 GiB of bitmap for 1 PiB storage)
 * 1 << (35 - 2)	32bit longs needed
 *	33 --> we'd even need u64 to index and count 32bit long words.
 * 1 << (35 - 3)	64bit longs needed
 *	32 --> we could get away with a 32bit unsigned int to index and count
 *	64bit long words, but I rather stay with unsigned long for now.
 *	We probably should neither count nor point to bytes or long words
 *	directly, but either by bitnumber, or by page index and offset.
 * 1 << (35 - 12)
 *	22 --> we need that much 4KiB pages of bitmap.
 *	1 << (22 + 3) --> on a 64bit arch,
 *	we need 32 MiB to store the array of page pointers.
 *
 * Because I'm lazy, and because the resulting patch was too large, too ugly
 * and still incomplete, on 32bit we still "only" support 16 TiB (minus some),
 * (1 << 32) bits * 4k storage.
 *

 * bitmap storage and IO:
 *	Bitmap is stored little endian on disk, and is kept little endian in
 *	core memory. Currently we still hold the full bitmap in core as long
 *	as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
 *	seems excessive.
 *
 *	We plan to reduce the amount of in-core bitmap pages by paging them in
 *	and out against their on-disk location as necessary, but need to make
 *	sure we don't cause too much meta data IO, and must not deadlock in
 *	tight memory situations. This needs some more work.
 */

/*
 * NOTE
 *  Access to the *bm_pages is protected by bm_lock.
 *  It is safe to read the other members within the lock.
 *
 *  drbd_bm_set_bits is called from bio_endio callbacks,
 *  We may be called with irq already disabled,
 *  so we need spin_lock_irqsave().
 *  And we need the kmap_atomic.
 */
struct drbd_bitmap {};

#define bm_print_lock_info(m)
static void __bm_print_lock_info(struct drbd_device *device, const char *func)
{}

void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags)
{}

void drbd_bm_unlock(struct drbd_device *device)
{}

/* we store some "meta" info about our pages in page->private */
/* at a granularity of 4k storage per bitmap bit:
 * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks
 *  1<<38 bits,
 *  1<<23 4k bitmap pages.
 * Use 24 bits as page index, covers 2 peta byte storage
 * at a granularity of 4k per bit.
 * Used to report the failed page idx on io error from the endio handlers.
 */
#define BM_PAGE_IDX_MASK
/* this page is currently read in, or written back */
#define BM_PAGE_IO_LOCK
/* if there has been an IO error for this page */
#define BM_PAGE_IO_ERROR
/* this is to be able to intelligently skip disk IO,
 * set if bits have been set since last IO. */
#define BM_PAGE_NEED_WRITEOUT
/* to mark for lazy writeout once syncer cleared all clearable bits,
 * we if bits have been cleared since last IO. */
#define BM_PAGE_LAZY_WRITEOUT
/* pages marked with this "HINT" will be considered for writeout
 * on activity log transactions */
#define BM_PAGE_HINT_WRITEOUT

/* store_page_idx uses non-atomic assignment. It is only used directly after
 * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
 * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
 * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
 * requires it all to be atomic as well. */
static void bm_store_page_idx(struct page *page, unsigned long idx)
{}

static unsigned long bm_page_to_idx(struct page *page)
{}

/* As is very unlikely that the same page is under IO from more than one
 * context, we can get away with a bit per page and one wait queue per bitmap.
 */
static void bm_page_lock_io(struct drbd_device *device, int page_nr)
{}

static void bm_page_unlock_io(struct drbd_device *device, int page_nr)
{}

/* set _before_ submit_io, so it may be reset due to being changed
 * while this page is in flight... will get submitted later again */
static void bm_set_page_unchanged(struct page *page)
{}

static void bm_set_page_need_writeout(struct page *page)
{}

void drbd_bm_reset_al_hints(struct drbd_device *device)
{}

/**
 * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
 * @device:	DRBD device.
 * @page_nr:	the bitmap page to mark with the "hint" flag
 *
 * From within an activity log transaction, we mark a few pages with these
 * hints, then call drbd_bm_write_hinted(), which will only write out changed
 * pages which are flagged with this mark.
 */
void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr)
{}

static int bm_test_page_unchanged(struct page *page)
{}

static void bm_set_page_io_err(struct page *page)
{}

static void bm_clear_page_io_err(struct page *page)
{}

static void bm_set_page_lazy_writeout(struct page *page)
{}

static int bm_test_page_lazy_writeout(struct page *page)
{}

/* on a 32bit box, this would allow for exactly (2<<38) bits. */
static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr)
{}

static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
{}

static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
{}

static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
{}

static void __bm_unmap(unsigned long *p_addr)
{
	kunmap_atomic(p_addr);
};

static void bm_unmap(unsigned long *p_addr)
{}

/* long word offset of _bitmap_ sector */
#define S2W(s)
/* word offset from start of bitmap to word number _in_page_
 * modulo longs per page
#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
 hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
 so do it explicitly:
 */
#define MLPP(X)

/* Long words per page */
#define LWPP

/*
 * actually most functions herein should take a struct drbd_bitmap*, not a
 * struct drbd_device*, but for the debug macros I like to have the device around
 * to be able to report device specific.
 */


static void bm_free_pages(struct page **pages, unsigned long number)
{}

static inline void bm_vk_free(void *ptr)
{}

/*
 * "have" and "want" are NUMBER OF PAGES.
 */
static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
{}

/*
 * allocates the drbd_bitmap and stores it in device->bitmap.
 */
int drbd_bm_init(struct drbd_device *device)
{}

sector_t drbd_bm_capacity(struct drbd_device *device)
{}

/* called on driver unload. TODO: call when a device is destroyed.
 */
void drbd_bm_cleanup(struct drbd_device *device)
{}

/*
 * since (b->bm_bits % BITS_PER_LONG) != 0,
 * this masks out the remaining bits.
 * Returns the number of bits cleared.
 */
#ifndef BITS_PER_PAGE
#define BITS_PER_PAGE
#define BITS_PER_PAGE_MASK
#else
# if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3))
#  error "ambiguous BITS_PER_PAGE"
# endif
#endif
#define BITS_PER_LONG_MASK
static int bm_clear_surplus(struct drbd_bitmap *b)
{}

static void bm_set_surplus(struct drbd_bitmap *b)
{}

/* you better not modify the bitmap while this is running,
 * or its results will be stale */
static unsigned long bm_count_bits(struct drbd_bitmap *b)
{}

/* offset and len in long words.*/
static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
{}

/* For the layout, see comment above drbd_md_set_sector_offsets(). */
static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev)
{}

/*
 * make sure the bitmap has enough room for the attached storage,
 * if necessary, resize.
 * called whenever we may have changed the device size.
 * returns -ENOMEM if we could not allocate enough memory, 0 on success.
 * In case this is actually a resize, we copy the old bitmap into the new one.
 * Otherwise, the bitmap is initialized to all bits set.
 */
int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bits)
{}

/* inherently racy:
 * if not protected by other means, return value may be out of date when
 * leaving this function...
 * we still need to lock it, since it is important that this returns
 * bm_set == 0 precisely.
 *
 * maybe bm_set should be atomic_t ?
 */
unsigned long _drbd_bm_total_weight(struct drbd_device *device)
{}

unsigned long drbd_bm_total_weight(struct drbd_device *device)
{}

size_t drbd_bm_words(struct drbd_device *device)
{}

unsigned long drbd_bm_bits(struct drbd_device *device)
{}

/* merge number words from buffer into the bitmap starting at offset.
 * buffer[i] is expected to be little endian unsigned long.
 * bitmap must be locked by drbd_bm_lock.
 * currently only used from receive_bitmap.
 */
void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number,
			unsigned long *buffer)
{}

/* copy number words from the bitmap starting at offset into the buffer.
 * buffer[i] will be little endian unsigned long.
 */
void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number,
		     unsigned long *buffer)
{}

/* set all bits in the bitmap */
void drbd_bm_set_all(struct drbd_device *device)
{}

/* clear all bits in the bitmap */
void drbd_bm_clear_all(struct drbd_device *device)
{}

static void drbd_bm_aio_ctx_destroy(struct kref *kref)
{}

/* bv_page may be a copy, or may be the original */
static void drbd_bm_endio(struct bio *bio)
{}

/* For the layout, see comment above drbd_md_set_sector_offsets(). */
static inline sector_t drbd_md_last_bitmap_sector(struct drbd_backing_dev *bdev)
{}

static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
{}

/*
 * bm_rw: read/write the whole bitmap from/to its on disk location.
 */
static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
{}

/**
 * drbd_bm_read() - Read the whole bitmap from its on disk location.
 * @device:	DRBD device.
 */
int drbd_bm_read(struct drbd_device *device,
		 struct drbd_peer_device *peer_device) __must_hold(local)

{}

/**
 * drbd_bm_write() - Write the whole bitmap to its on disk location.
 * @device:	DRBD device.
 *
 * Will only write pages that have changed since last IO.
 */
int drbd_bm_write(struct drbd_device *device,
		 struct drbd_peer_device *peer_device) __must_hold(local)
{}

/**
 * drbd_bm_write_all() - Write the whole bitmap to its on disk location.
 * @device:	DRBD device.
 *
 * Will write all pages.
 */
int drbd_bm_write_all(struct drbd_device *device,
		struct drbd_peer_device *peer_device) __must_hold(local)
{}

/**
 * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
 * @device:	DRBD device.
 * @upper_idx:	0: write all changed pages; +ve: page index to stop scanning for changed pages
 */
int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local)
{}

/**
 * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location.
 * @device:	DRBD device.
 *
 * Will only write pages that have changed since last IO.
 * In contrast to drbd_bm_write(), this will copy the bitmap pages
 * to temporary writeout pages. It is intended to trigger a full write-out
 * while still allowing the bitmap to change, for example if a resync or online
 * verify is aborted due to a failed peer disk, while local IO continues, or
 * pending resync acks are still being processed.
 */
int drbd_bm_write_copy_pages(struct drbd_device *device,
		struct drbd_peer_device *peer_device) __must_hold(local)
{}

/**
 * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
 * @device:	DRBD device.
 */
int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local)
{}

/* NOTE
 * find_first_bit returns int, we return unsigned long.
 * For this to work on 32bit arch with bitnumbers > (1<<32),
 * we'd need to return u64, and get a whole lot of other places
 * fixed where we still use unsigned long.
 *
 * this returns a bit number, NOT a sector!
 */
static unsigned long __bm_find_next(struct drbd_device *device, unsigned long bm_fo,
	const int find_zero_bit)
{}

static unsigned long bm_find_next(struct drbd_device *device,
	unsigned long bm_fo, const int find_zero_bit)
{}

unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo)
{}

#if 0
/* not yet needed for anything. */
unsigned long drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo)
{
	return bm_find_next(device, bm_fo, 1);
}
#endif

/* does not spin_lock_irqsave.
 * you must take drbd_bm_lock() first */
unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo)
{}

unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo)
{}

/* returns number of bits actually changed.
 * for val != 0, we change 0 -> 1, return code positive
 * for val == 0, we change 1 -> 0, return code negative
 * wants bitnr, not sector.
 * expected to be called for only a few bits (e - s about BITS_PER_LONG).
 * Must hold bitmap lock already. */
static int __bm_change_bits_to(struct drbd_device *device, const unsigned long s,
	unsigned long e, int val)
{}

/* returns number of bits actually changed.
 * for val != 0, we change 0 -> 1, return code positive
 * for val == 0, we change 1 -> 0, return code negative
 * wants bitnr, not sector */
static int bm_change_bits_to(struct drbd_device *device, const unsigned long s,
	const unsigned long e, int val)
{}

/* returns number of bits changed 0 -> 1 */
int drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{}

/* returns number of bits changed 1 -> 0 */
int drbd_bm_clear_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{}

/* sets all bits in full words,
 * from first_word up to, but not including, last_word */
static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
		int page_nr, int first_word, int last_word)
{}

/* Same thing as drbd_bm_set_bits,
 * but more efficient for a large bit range.
 * You must first drbd_bm_lock().
 * Can be called to set the whole bitmap in one go.
 * Sets bits from s to e _inclusive_. */
void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{}

/* returns bit state
 * wants bitnr, NOT sector.
 * inherently racy... area needs to be locked by means of {al,rs}_lru
 *  1 ... bit set
 *  0 ... bit not set
 * -1 ... first out of bounds access, stop testing for bits!
 */
int drbd_bm_test_bit(struct drbd_device *device, const unsigned long bitnr)
{}

/* returns number of bits set in the range [s, e] */
int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{}


/* inherently racy...
 * return value may be already out-of-date when this function returns.
 * but the general usage is that this is only use during a cstate when bits are
 * only cleared, not set, and typically only care for the case when the return
 * value is zero, or we already "locked" this "bitmap extent" by other means.
 *
 * enr is bm-extent number, since we chose to name one sector (512 bytes)
 * worth of the bitmap a "bitmap extent".
 *
 * TODO
 * I think since we use it like a reference count, we should use the real
 * reference count of some bitmap extent element from some lru instead...
 *
 */
int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
{}